summaryrefslogtreecommitdiff
path: root/compiler/luci-interpreter
diff options
context:
space:
mode:
authorChunseok Lee <chunseok.lee@samsung.com>2022-04-15 19:15:11 +0900
committerChunseok Lee <chunseok.lee@samsung.com>2022-04-15 19:15:11 +0900
commit3ad689f0803519e343c36d5700646e86059df961 (patch)
tree862346c401a5577518fa7f042532aa931b53aa0e /compiler/luci-interpreter
parentac6e4dd7b480e83b586ef533d7b29a8a97eb48fe (diff)
downloadnnfw-3ad689f0803519e343c36d5700646e86059df961.tar.gz
nnfw-3ad689f0803519e343c36d5700646e86059df961.tar.bz2
nnfw-3ad689f0803519e343c36d5700646e86059df961.zip
Imported Upstream version 1.20.0upstream/1.20.0submit/tizen/20220415.103159
Diffstat (limited to 'compiler/luci-interpreter')
-rw-r--r--compiler/luci-interpreter/README.md2
-rw-r--r--compiler/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h35
-rw-r--r--compiler/luci-interpreter/include/luci_interpreter/Interpreter.h5
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst4
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h124
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h163
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h192
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h44
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALFullyConnected.h114
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALMul.h18
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h44
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/PALSVDF.h190
-rw-r--r--compiler/luci-interpreter/pal/cmsisnn/pal.cmake9
-rw-r--r--compiler/luci-interpreter/pal/linux/KernelsToBuild.lst7
-rw-r--r--compiler/luci-interpreter/pal/linux/PALAveragePool2d.h73
-rw-r--r--compiler/luci-interpreter/pal/linux/PALBatchMatMul.h67
-rw-r--r--compiler/luci-interpreter/pal/linux/PALConv2d.h72
-rw-r--r--compiler/luci-interpreter/pal/linux/PALDepthwiseConv2d.h91
-rw-r--r--compiler/luci-interpreter/pal/linux/PALDequantize.h34
-rw-r--r--compiler/luci-interpreter/pal/linux/PALFullyConnected.h61
-rw-r--r--compiler/luci-interpreter/pal/linux/PALGather.h35
-rw-r--r--compiler/luci-interpreter/pal/linux/PALMul.h28
-rw-r--r--compiler/luci-interpreter/pal/linux/PALQuantize.h44
-rw-r--r--compiler/luci-interpreter/pal/linux/PALSVDF.h90
-rw-r--r--compiler/luci-interpreter/pal/linux/pal.cmake30
-rw-r--r--compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst4
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALAveragePool2d.h73
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALConv2d.h43
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h91
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALDequantize.h44
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALFullyConnected.h61
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALMul.h18
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALQuantize.h44
-rw-r--r--compiler/luci-interpreter/pal/mcu/PALSVDF.h258
-rw-r--r--compiler/luci-interpreter/pal/mcu/pal.cmake4
-rw-r--r--compiler/luci-interpreter/src/CMakeLists.txt3
-rw-r--r--compiler/luci-interpreter/src/Interpreter.cpp27
-rw-r--r--compiler/luci-interpreter/src/core/CMakeLists.txt4
-rw-r--r--compiler/luci-interpreter/src/core/KernelParams.h25
-rw-r--r--compiler/luci-interpreter/src/import/CMakeLists.txt15
-rw-r--r--compiler/luci-interpreter/src/import/GraphBuilderRegistry.cpp33
-rw-r--r--compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp113
-rw-r--r--compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.h39
-rw-r--r--compiler/luci-interpreter/src/kernels/Add.cpp38
-rw-r--r--compiler/luci-interpreter/src/kernels/Add.h1
-rw-r--r--compiler/luci-interpreter/src/kernels/Add.test.cpp93
-rw-r--r--compiler/luci-interpreter/src/kernels/ArgMax.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/AveragePool2D.cpp21
-rw-r--r--compiler/luci-interpreter/src/kernels/AveragePool2D.h3
-rw-r--r--compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp29
-rw-r--r--compiler/luci-interpreter/src/kernels/BatchMatMul.cpp188
-rw-r--r--compiler/luci-interpreter/src/kernels/BatchMatMul.h49
-rw-r--r--compiler/luci-interpreter/src/kernels/BatchMatMul.test.cpp272
-rw-r--r--compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/CMakeLists.txt4
-rw-r--r--compiler/luci-interpreter/src/kernels/Cast.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/Concatenation.cpp18
-rw-r--r--compiler/luci-interpreter/src/kernels/Concatenation.test.cpp55
-rw-r--r--compiler/luci-interpreter/src/kernels/Conv2D.cpp73
-rw-r--r--compiler/luci-interpreter/src/kernels/Conv2D.h3
-rw-r--r--compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp28
-rw-r--r--compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h2
-rw-r--r--compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp50
-rw-r--r--compiler/luci-interpreter/src/kernels/Dequantize.cpp79
-rw-r--r--compiler/luci-interpreter/src/kernels/Dequantize.h43
-rw-r--r--compiler/luci-interpreter/src/kernels/Dequantize.test.cpp149
-rw-r--r--compiler/luci-interpreter/src/kernels/Div.cpp36
-rw-r--r--compiler/luci-interpreter/src/kernels/Div.h1
-rw-r--r--compiler/luci-interpreter/src/kernels/Div.test.cpp69
-rw-r--r--compiler/luci-interpreter/src/kernels/Equal.cpp29
-rw-r--r--compiler/luci-interpreter/src/kernels/Equal.h1
-rw-r--r--compiler/luci-interpreter/src/kernels/Equal.test.cpp106
-rw-r--r--compiler/luci-interpreter/src/kernels/ExpandDims.cpp88
-rw-r--r--compiler/luci-interpreter/src/kernels/ExpandDims.h44
-rw-r--r--compiler/luci-interpreter/src/kernels/ExpandDims.test.cpp115
-rw-r--r--compiler/luci-interpreter/src/kernels/FullyConnected.cpp18
-rw-r--r--compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/Gather.cpp139
-rw-r--r--compiler/luci-interpreter/src/kernels/Gather.h47
-rw-r--r--compiler/luci-interpreter/src/kernels/Gather.test.cpp137
-rw-r--r--compiler/luci-interpreter/src/kernels/Greater.cpp29
-rw-r--r--compiler/luci-interpreter/src/kernels/Greater.h1
-rw-r--r--compiler/luci-interpreter/src/kernels/Greater.test.cpp106
-rw-r--r--compiler/luci-interpreter/src/kernels/GreaterEqual.cpp29
-rw-r--r--compiler/luci-interpreter/src/kernels/GreaterEqual.h1
-rw-r--r--compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp105
-rw-r--r--compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp3
-rw-r--r--compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/Less.cpp29
-rw-r--r--compiler/luci-interpreter/src/kernels/Less.h1
-rw-r--r--compiler/luci-interpreter/src/kernels/Less.test.cpp106
-rw-r--r--compiler/luci-interpreter/src/kernels/LessEqual.cpp29
-rw-r--r--compiler/luci-interpreter/src/kernels/LessEqual.h1
-rw-r--r--compiler/luci-interpreter/src/kernels/LessEqual.test.cpp106
-rw-r--r--compiler/luci-interpreter/src/kernels/Logistic.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/MirrorPad.cpp118
-rw-r--r--compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp210
-rw-r--r--compiler/luci-interpreter/src/kernels/Mul.cpp37
-rw-r--r--compiler/luci-interpreter/src/kernels/Mul.h1
-rw-r--r--compiler/luci-interpreter/src/kernels/Mul.test.cpp126
-rw-r--r--compiler/luci-interpreter/src/kernels/NotEqual.cpp29
-rw-r--r--compiler/luci-interpreter/src/kernels/NotEqual.h1
-rw-r--r--compiler/luci-interpreter/src/kernels/NotEqual.test.cpp106
-rw-r--r--compiler/luci-interpreter/src/kernels/OneHot.cpp136
-rw-r--r--compiler/luci-interpreter/src/kernels/OneHot.h48
-rw-r--r--compiler/luci-interpreter/src/kernels/OneHot.test.cpp192
-rw-r--r--compiler/luci-interpreter/src/kernels/Pack.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/Pad.cpp10
-rw-r--r--compiler/luci-interpreter/src/kernels/Pad.test.cpp26
-rw-r--r--compiler/luci-interpreter/src/kernels/Quantize.cpp160
-rw-r--r--compiler/luci-interpreter/src/kernels/Quantize.h43
-rw-r--r--compiler/luci-interpreter/src/kernels/Quantize.test.cpp254
-rw-r--r--compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/SVDF.cpp241
-rw-r--r--compiler/luci-interpreter/src/kernels/SVDF.h56
-rw-r--r--compiler/luci-interpreter/src/kernels/SVDF.test.cpp341
-rw-r--r--compiler/luci-interpreter/src/kernels/Slice.cpp5
-rw-r--r--compiler/luci-interpreter/src/kernels/Slice.test.cpp4
-rw-r--r--compiler/luci-interpreter/src/kernels/Softmax.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/Split.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/SplitV.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/Squeeze.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/Sub.cpp36
-rw-r--r--compiler/luci-interpreter/src/kernels/Sub.h1
-rw-r--r--compiler/luci-interpreter/src/kernels/Sub.test.cpp75
-rw-r--r--compiler/luci-interpreter/src/kernels/Transpose.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/Unpack.test.cpp2
-rw-r--r--compiler/luci-interpreter/src/kernels/Utils.cpp22
-rw-r--r--compiler/luci-interpreter/src/kernels/Utils.h33
-rw-r--r--compiler/luci-interpreter/src/loader/CMakeLists.txt4
-rw-r--r--compiler/luci-interpreter/src/loader/GraphLoader.cpp94
-rw-r--r--compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp48
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp22
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp72
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp17
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp22
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp37
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/ExpandDims.cpp37
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp1
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Gather.cpp44
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/OneHot.cpp42
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/Quantize.cpp37
-rw-r--r--compiler/luci-interpreter/src/loader/nodes/SVDF.cpp93
149 files changed, 7681 insertions, 289 deletions
diff --git a/compiler/luci-interpreter/README.md b/compiler/luci-interpreter/README.md
index 4a9a34e6d..77ec5c81c 100644
--- a/compiler/luci-interpreter/README.md
+++ b/compiler/luci-interpreter/README.md
@@ -111,7 +111,7 @@ Note that one memory manager could be shared between multiple interpreter instan
List of predefined memory managers:
- `SimpleMemoryManager` This is a simple wrapper around new/delete, default one.
-- `TestMemoryManager` Memorizes all allocated memory and releases it in Manager desctuctor, used in kernel unit tests.
+- `TestMemoryManager` Memorizes all allocated memory and releases it in Manager destructor, used in kernel unit tests.
- `BuddyMemoryManager` Implements Buddy algorithm, uses external buffer for tensor data allocations, does not need new/delete.
- `StaticMemoryManger` Uses precomputed memory allocation plan. Requires preparation with MemoryPlanner, but could reduce memory consumption in restricted environments (like MCUs).
diff --git a/compiler/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h b/compiler/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h
new file mode 100644
index 000000000..375b1ae20
--- /dev/null
+++ b/compiler/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__
+#define __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__
+
+#include <luci/Import/GraphBuilderRegistry.h>
+
+namespace luci_interpreter
+{
+
+/**
+ * @brief Creates and returns GraphBuilderSource, which allows to not copy constant buffers from
+ * model's file.
+ *
+ * @warning Use this source only in case when model's buffer alive longer than Interpreter.
+ */
+std::unique_ptr<luci::GraphBuilderSource> source_without_constant_copying();
+
+} // namespace luci_interpreter
+
+#endif // __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__
diff --git a/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h b/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h
index 7dee8a7f2..8e2f457a5 100644
--- a/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h
+++ b/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h
@@ -50,7 +50,9 @@ public:
class Interpreter
{
public:
- explicit Interpreter(const luci::Module *module, IMemoryManager *memory_manager = nullptr);
+ explicit Interpreter(const luci::Module *module);
+
+ explicit Interpreter(const luci::Module *module, IMemoryManager *memory_manager);
~Interpreter();
@@ -69,7 +71,6 @@ private:
// the order of deletion in the destructor
std::unique_ptr<IMemoryManager> _default_memory_manager = nullptr;
std::unique_ptr<class RuntimeModule> _runtime_module;
- IMemoryManager *_memory_manager = nullptr;
// Observer functionality support.
std::unique_ptr<struct RuntimeToIR> _runtime_to_ir;
diff --git a/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst b/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
index 771974afe..d134a6b95 100644
--- a/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
+++ b/compiler/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
@@ -7,9 +7,11 @@ REGISTER_KERNEL(Concatenation)
REGISTER_KERNEL(Conv2D)
REGISTER_KERNEL(DepthToSpace)
REGISTER_KERNEL(DepthwiseConv2D)
+REGISTER_KERNEL(Dequantize)
REGISTER_KERNEL(Div)
REGISTER_KERNEL(Elu)
REGISTER_KERNEL(Exp)
+REGISTER_KERNEL(ExpandDims)
REGISTER_KERNEL(Floor)
REGISTER_KERNEL(FloorDiv)
REGISTER_KERNEL(Equal)
@@ -37,6 +39,7 @@ REGISTER_KERNEL(NotEqual)
REGISTER_KERNEL(Pad)
REGISTER_KERNEL(PadV2)
REGISTER_KERNEL(PRelu)
+REGISTER_KERNEL(Quantize)
REGISTER_KERNEL(Reshape)
REGISTER_KERNEL(ResizeBilinear)
REGISTER_KERNEL(ResizeNearestNeighbor)
@@ -50,6 +53,7 @@ REGISTER_KERNEL(Square)
REGISTER_KERNEL(SquaredDifference)
REGISTER_KERNEL(Squeeze)
REGISTER_KERNEL(Sub)
+REGISTER_KERNEL(SVDF)
REGISTER_KERNEL(Tanh)
REGISTER_KERNEL(Transpose)
REGISTER_KERNEL(TransposeConv)
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h b/compiler/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h
new file mode 100644
index 000000000..a274afb7e
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+#define LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+#include <arm_nn_types.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void AveragePool(const tflite::PoolParams &params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data,
+ const tflite::RuntimeShape &scratchpad_shape, T *scratchpad_data)
+{
+ {
+ // MARK: At this moment this operation is not supported
+ assert(false && "AveragePool NYI");
+ (void)params;
+ (void)input_shape;
+ (void)input_data;
+ (void)output_shape;
+ (void)output_data;
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ }
+}
+
+template <>
+inline void AveragePool<int8_t>(const tflite::PoolParams &params,
+ const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data,
+ const tflite::RuntimeShape &scratchpad_shape,
+ int8_t *scratchpad_data)
+{
+ assert(input_shape.DimensionsCount() == 4);
+ assert(output_shape.DimensionsCount() == 4);
+ assert(scratchpad_data != nullptr);
+
+ const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+ assert(batches == 1);
+
+ const int depth = tflite::MatchingDim(input_shape, 3, output_shape, 3);
+
+ cmsis_nn_dims input_dims;
+ input_dims.n = 1;
+ input_dims.h = input_shape.Dims(1);
+ input_dims.w = input_shape.Dims(2);
+ input_dims.c = depth;
+
+ cmsis_nn_dims output_dims;
+ output_dims.n = 1;
+ output_dims.h = output_shape.Dims(1);
+ output_dims.w = output_shape.Dims(2);
+ output_dims.c = depth;
+
+ cmsis_nn_pool_params pool_params;
+ pool_params.stride.h = params.stride_height;
+ pool_params.stride.w = params.stride_width;
+ pool_params.padding.h = params.padding_values.height;
+ pool_params.padding.w = params.padding_values.width;
+ pool_params.activation.min = params.quantized_activation_min;
+ pool_params.activation.max = params.quantized_activation_max;
+
+ cmsis_nn_dims filter_dims;
+ filter_dims.n = 1;
+ filter_dims.h = params.filter_height;
+ filter_dims.w = params.filter_width;
+ filter_dims.c = 1;
+
+ cmsis_nn_context ctx;
+ ctx.buf = scratchpad_data;
+ ctx.size = scratchpad_shape.Dims(0);
+ auto res = arm_avgpool_s8(&ctx, &pool_params, &input_dims, input_data, &filter_dims, &output_dims,
+ output_data);
+ assert(res == ARM_MATH_SUCCESS);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+ const luci_interpreter::DataType &input_data_type,
+ const tflite::RuntimeShape &input_shape,
+ const tflite::RuntimeShape &output_shape)
+
+{
+ if (input_data_type == luci_interpreter::DataType::S8)
+ {
+ assert(input_shape.DimensionsCount() == 4);
+ assert(output_shape.DimensionsCount() == 4);
+
+ const int32_t output_width = output_shape.Dims(2);
+ const int32_t depth = tflite::MatchingDim(input_shape, 3, output_shape, 3);
+
+ const int32_t buf_size = arm_avgpool_s8_get_buffer_size(output_width, depth);
+ auto data_type_size = static_cast<int32_t>(luci_interpreter::getDataTypeSize(input_data_type));
+
+ luci_interpreter::Shape scratchpad_shape{buf_size * data_type_size};
+ scratchpad->resize(scratchpad_shape);
+ }
+ else
+ {
+ scratchpad->set_allocatable(false);
+ }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h b/compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h
index 0a8ae4e48..cfb84ea60 100644
--- a/compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALConv2d.h
@@ -19,6 +19,8 @@
#include <tensorflow/lite/kernels/internal/reference/conv.h>
#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h>
+#include <arm_nn_types.h>
+#include <arm_nnfunctions.h>
namespace luci_interpreter_pal
{
@@ -26,11 +28,11 @@ static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeS
const float *input_data, const tflite::RuntimeShape &filter_shape,
const float *filter_data, const tflite::RuntimeShape &bias_shape,
const float *bias_data, const tflite::RuntimeShape &output_shape,
- float *output_data, const tflite::RuntimeShape &im2col_shape,
- float *im2col_data)
+ float *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ float *scratchpad_data)
{
- (void)im2col_shape;
- (void)im2col_data;
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
bias_shape, bias_data, output_shape, output_data,
tflite::RuntimeShape(), nullptr);
@@ -40,14 +42,14 @@ static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeS
const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
const int32 *bias_data, const tflite::RuntimeShape &output_shape,
- uint8 *output_data, const tflite::RuntimeShape &im2col_shape,
- uint8 *im2col_data)
+ uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ uint8 *scratchpad_data)
{
- (void)im2col_shape;
- (void)im2col_data;
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
- bias_shape, bias_data, output_shape, output_data, im2col_shape,
- im2col_data, nullptr);
+ bias_shape, bias_data, output_shape, output_data, scratchpad_shape,
+ scratchpad_data, nullptr);
}
static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult,
@@ -55,14 +57,141 @@ static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_
const int8 *input_data, const tflite::RuntimeShape &filter_shape,
const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
const int32 *bias_data, const tflite::RuntimeShape &output_shape,
- int8 *output_data, const tflite::RuntimeShape &im2col_shape,
- int8 *im2col_data)
+ int8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ int8 *scratchpad_data)
{
- (void)im2col_shape;
- (void)im2col_data;
- tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
- filter_shape, filter_data, bias_shape, bias_data,
- output_shape, output_data);
+ if (scratchpad_data)
+ {
+ cmsis_nn_conv_params conv_params;
+ conv_params.dilation.h = params.dilation_height_factor;
+ conv_params.dilation.w = params.dilation_width_factor;
+
+ assert(conv_params.dilation.h == 1);
+ assert(conv_params.dilation.w == 1);
+
+ conv_params.input_offset = params.input_offset;
+ conv_params.output_offset = params.output_offset;
+ conv_params.stride.h = params.stride_height;
+ conv_params.stride.w = params.stride_width;
+ conv_params.padding.h = params.padding_values.height;
+ conv_params.padding.w = params.padding_values.width;
+ conv_params.activation.min = params.quantized_activation_min;
+ conv_params.activation.max = params.quantized_activation_max;
+
+ cmsis_nn_per_channel_quant_params quant_params;
+ quant_params.multiplier = const_cast<int32_t *>(mult);
+ quant_params.shift = const_cast<int32_t *>(shifts);
+
+ assert(conv_params.activation.min <= conv_params.activation.max);
+ assert(input_shape.DimensionsCount() == 4);
+ assert(filter_shape.DimensionsCount() == 4);
+ assert(output_shape.DimensionsCount() == 4);
+ const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+ const int input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+ const int output_depth = tflite::MatchingDim(filter_shape, 0, output_shape, 3);
+ if (bias_data)
+ {
+ assert(bias_shape.FlatSize() == output_depth);
+ }
+
+ cmsis_nn_dims input_dims;
+ input_dims.n = batch_size;
+ input_dims.h = input_shape.Dims(1);
+ input_dims.w = input_shape.Dims(2);
+ input_dims.c = input_depth;
+
+ cmsis_nn_dims filter_dims;
+ filter_dims.n = output_depth;
+ filter_dims.h = filter_shape.Dims(1);
+ filter_dims.w = filter_shape.Dims(2);
+ filter_dims.c = input_depth;
+
+ cmsis_nn_dims bias_dims;
+ bias_dims.n = 1;
+ bias_dims.h = 1;
+ bias_dims.w = 1;
+ bias_dims.c = output_depth;
+
+ cmsis_nn_dims output_dims;
+ output_dims.n = batch_size;
+ output_dims.h = output_shape.Dims(1);
+ output_dims.w = output_shape.Dims(2);
+ output_dims.c = output_depth;
+
+ cmsis_nn_context ctx;
+ ctx.buf = scratchpad_data;
+ ctx.size = scratchpad_shape.Dims(0);
+
+ auto res = arm_convolve_wrapper_s8(&ctx, &conv_params, &quant_params, &input_dims, input_data,
+ &filter_dims, filter_data, &bias_dims, bias_data,
+ &output_dims, output_data);
+ assert(res == ARM_MATH_SUCCESS);
+ }
+ else
+ {
+ tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
+ filter_shape, filter_data, bias_shape, bias_data,
+ output_shape, output_data);
+ }
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+ const luci_interpreter::DataType &input_data_type,
+ const tflite::ConvParams &params,
+ const tflite::RuntimeShape &input_shape,
+ const tflite::RuntimeShape &filter_shape,
+ const tflite::RuntimeShape &output_shape)
+{
+ cmsis_nn_conv_params conv_params;
+ conv_params.dilation.h = params.dilation_height_factor;
+ conv_params.dilation.w = params.dilation_width_factor;
+
+ if (input_data_type == loco::DataType::S8 && conv_params.dilation.h == 1 &&
+ conv_params.dilation.w == 1)
+ {
+ const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+ const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+ const int32_t output_depth = tflite::MatchingDim(filter_shape, 0, output_shape, 3);
+ const int32_t filter_height = filter_shape.Dims(1);
+ const int32_t filter_width = filter_shape.Dims(2);
+ const int32_t output_height = output_shape.Dims(1);
+ const int32_t output_width = output_shape.Dims(2);
+
+ conv_params.input_offset = params.input_offset;
+ conv_params.output_offset = params.output_offset;
+ conv_params.stride.h = params.stride_height;
+ conv_params.stride.w = params.stride_width;
+ conv_params.padding.h = params.padding_values.height;
+ conv_params.padding.w = params.padding_values.width;
+
+ cmsis_nn_dims input_dims;
+ input_dims.n = batches;
+ input_dims.h = input_shape.Dims(1);
+ input_dims.w = input_shape.Dims(2);
+ input_dims.c = input_depth;
+
+ cmsis_nn_dims filter_dims;
+ filter_dims.n = output_depth;
+ filter_dims.h = filter_height;
+ filter_dims.w = filter_width;
+ filter_dims.c = input_depth;
+
+ cmsis_nn_dims output_dims;
+ output_dims.n = batches;
+ output_dims.h = output_height;
+ output_dims.w = output_width;
+ output_dims.c = output_depth;
+
+ const int32_t buf_size = arm_convolve_wrapper_s8_get_buffer_size(&conv_params, &input_dims,
+ &filter_dims, &output_dims);
+
+ luci_interpreter::Shape scratchpad_shape{buf_size};
+ scratchpad->resize(scratchpad_shape);
+ }
+ else
+ {
+ scratchpad->set_allocatable(false);
+ }
}
} // namespace luci_interpreter_pal
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h b/compiler/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h
new file mode 100644
index 000000000..120dcd803
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+DepthwiseConvPerChannel(const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+ const int32_t *output_shift, const tflite::RuntimeShape &input_shape,
+ const T *input_data, const tflite::RuntimeShape &filter_shape,
+ const T *filter_data, const tflite::RuntimeShape &bias_shape,
+ const int32_t *bias_data, const tflite::RuntimeShape &output_shape,
+ T *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ T *scratchpad_data)
+{
+ {
+ // MARK: At this moment this operation is not supported
+ assert(false && "DepthwiseConvPerChannel NYI");
+ (void)params;
+ (void)output_multiplier;
+ (void)output_shift;
+ (void)input_shape;
+ (void)output_data;
+ (void)input_data;
+ (void)filter_shape;
+ (void)filter_data;
+ (void)bias_shape;
+ (void)bias_data;
+ (void)output_shape;
+ (void)output_data;
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ }
+}
+
+template <>
+inline void DepthwiseConvPerChannel<int8_t>(
+ const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+ const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+ const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+ const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data,
+ const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data)
+{
+ if (scratchpad_data)
+ {
+ cmsis_nn_dw_conv_params dw_conv_params;
+ dw_conv_params.dilation.h = params.dilation_height_factor;
+ dw_conv_params.dilation.w = params.dilation_width_factor;
+ assert(dw_conv_params.dilation.h == 1);
+ assert(dw_conv_params.dilation.w == 1);
+
+ dw_conv_params.input_offset = params.input_offset;
+ dw_conv_params.output_offset = params.output_offset;
+ dw_conv_params.stride.h = params.stride_height;
+ dw_conv_params.stride.w = params.stride_width;
+ dw_conv_params.padding.h = params.padding_values.height;
+ dw_conv_params.padding.w = params.padding_values.width;
+
+ dw_conv_params.activation.min = params.quantized_activation_min;
+ dw_conv_params.activation.max = params.quantized_activation_max;
+ dw_conv_params.ch_mult = params.depth_multiplier;
+
+ cmsis_nn_per_channel_quant_params quant_params;
+ int32_t output_multiplier = params.output_multiplier;
+ int32_t output_shift = params.output_shift;
+
+ quant_params.multiplier = &output_multiplier;
+ quant_params.shift = &output_shift;
+
+ assert(dw_conv_params.activation.min <= dw_conv_params.activation.max);
+ const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+ const int output_depth = tflite::MatchingDim(filter_shape, 3, output_shape, 3);
+ if (bias_data)
+ {
+ assert(bias_shape.FlatSize() == output_depth);
+ }
+
+ cmsis_nn_dims input_dims;
+ input_dims.n = batch_size;
+ input_dims.h = input_shape.Dims(1);
+ input_dims.w = input_shape.Dims(2);
+ input_dims.c = input_shape.Dims(3);
+
+ cmsis_nn_dims filter_dims;
+ filter_dims.n = filter_shape.Dims(0);
+ filter_dims.h = filter_shape.Dims(1);
+ filter_dims.w = filter_shape.Dims(2);
+ filter_dims.c = output_depth;
+
+ cmsis_nn_dims bias_dims;
+ bias_dims.n = 1;
+ bias_dims.h = 1;
+ bias_dims.w = 1;
+ bias_dims.c = output_depth;
+
+ cmsis_nn_dims output_dims;
+ output_dims.n = batch_size;
+ output_dims.h = output_shape.Dims(1);
+ output_dims.w = output_shape.Dims(2);
+ output_dims.c = output_depth;
+
+ cmsis_nn_context ctx;
+ ctx.buf = scratchpad_data;
+ ctx.size = scratchpad_shape.Dims(0);
+
+ auto res = arm_depthwise_conv_wrapper_s8(&ctx, &dw_conv_params, &quant_params, &input_dims,
+ input_data, &filter_dims, filter_data, &bias_dims,
+ bias_data, &output_dims, output_data);
+ assert(res == ARM_MATH_SUCCESS);
+ }
+ else
+ {
+ tflite::reference_integer_ops::DepthwiseConvPerChannel(
+ params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data);
+ }
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+ const tflite::DepthwiseParams &params,
+ const luci_interpreter::DataType &input_data_type,
+ const tflite::RuntimeShape &input_shape,
+ const tflite::RuntimeShape &filter_shape,
+ const tflite::RuntimeShape &output_shape)
+{
+ cmsis_nn_dw_conv_params dw_conv_params;
+ dw_conv_params.dilation.h = params.dilation_height_factor;
+ dw_conv_params.dilation.w = params.dilation_width_factor;
+
+ if (input_data_type == loco::DataType::S8 && dw_conv_params.dilation.h == 1 &&
+ dw_conv_params.dilation.w == 1)
+ {
+ const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+ const int output_depth = tflite::MatchingDim(filter_shape, 3, output_shape, 3);
+
+ cmsis_nn_dims input_dims;
+ input_dims.n = batch_size;
+ input_dims.h = input_shape.Dims(1);
+ input_dims.w = input_shape.Dims(2);
+ input_dims.c = input_shape.Dims(3);
+
+ cmsis_nn_dims filter_dims;
+ filter_dims.n = filter_shape.Dims(0);
+ filter_dims.h = filter_shape.Dims(1);
+ filter_dims.w = filter_shape.Dims(2);
+ filter_dims.c = output_depth;
+
+ cmsis_nn_dims output_dims;
+ output_dims.n = batch_size;
+ output_dims.h = output_shape.Dims(1);
+ output_dims.w = output_shape.Dims(2);
+ output_dims.c = output_depth;
+
+ const int32_t buf_size = arm_depthwise_conv_wrapper_s8_get_buffer_size(
+ &dw_conv_params, &input_dims, &filter_dims, &output_dims);
+
+ auto data_type_size = static_cast<int32_t>(luci_interpreter::getDataTypeSize(input_data_type));
+
+ luci_interpreter::Shape scratchpad_shape{buf_size * data_type_size};
+ scratchpad->resize(scratchpad_shape);
+ }
+ else
+ {
+ scratchpad->set_allocatable(false);
+ }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h b/compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h
new file mode 100644
index 000000000..15ff0327b
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALDequantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h"
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+
+namespace luci_interpreter_pal
+{
+
+template <typename T>
+static inline void Dequantize(tflite::DequantizationParams &params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_integer_ops::Dequantize<T>(params, input_shape, input_data, output_shape,
+ output_data);
+}
+
+static inline void Dequantize(tflite::DequantizationParams &params,
+ const tflite::RuntimeShape &input_shape, const uint8_t *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_ops::Dequantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALFullyConnected.h b/compiler/luci-interpreter/pal/cmsisnn/PALFullyConnected.h
new file mode 100644
index 000000000..32e905761
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALFullyConnected.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+#define LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+
+#include <tensorflow/lite/kernels/internal/reference/fully_connected.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void FullyConnected(const tflite::FullyConnectedParams &params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &filter_shape, const T *filter_data,
+ const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ {
+ // MARK: At this moment this operation doesn't support
+ assert(false && "FullyConnected NYI");
+ (void)params;
+ (void)input_shape;
+ (void)input_data;
+ (void)filter_shape;
+ (void)filter_data;
+ (void)bias_shape;
+ (void)bias_data;
+ (void)output_shape;
+ (void)output_data;
+ }
+}
+
+template <>
+inline void
+FullyConnected<int8_t>(const tflite::FullyConnectedParams &params,
+ const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+ const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+ const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data)
+{
+ assert(output_shape.DimensionsCount() == 2);
+
+ const int batches = output_shape.Dims(0);
+ const int output_depth = output_shape.Dims(1);
+
+ const int filter_dim_count = filter_shape.DimensionsCount();
+ const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
+
+ cmsis_nn_fc_params fc_params;
+ fc_params.input_offset = params.input_offset;
+ fc_params.output_offset = params.output_offset;
+ fc_params.filter_offset = params.weights_offset;
+ fc_params.activation.min = params.quantized_activation_min;
+ fc_params.activation.max = params.quantized_activation_max;
+
+ cmsis_nn_per_tensor_quant_params quant_params;
+ quant_params.multiplier = params.output_multiplier;
+ quant_params.shift = params.output_shift;
+
+ cmsis_nn_dims input_dims;
+ input_dims.n = batches;
+ input_dims.h = 1;
+ input_dims.w = 1;
+ input_dims.c = accum_depth;
+
+ cmsis_nn_dims filter_dims;
+ filter_dims.n = accum_depth;
+ filter_dims.h = 1;
+ filter_dims.w = 1;
+ filter_dims.c = output_depth;
+
+ cmsis_nn_dims bias_dims;
+ bias_dims.n = 1;
+ bias_dims.h = 1;
+ bias_dims.w = 1;
+ bias_dims.c = output_depth;
+
+ cmsis_nn_dims output_dims;
+ output_dims.n = batches;
+ output_dims.h = 1;
+ output_dims.w = 1;
+ output_dims.c = output_depth;
+
+ int32_t buf_size = arm_fully_connected_s8_get_buffer_size(&filter_dims);
+ auto buffer = std::make_unique<int8_t[]>(buf_size);
+ assert(buffer != nullptr);
+
+ cmsis_nn_context ctx;
+ ctx.buf = buffer.get();
+ ctx.size = buf_size;
+
+ auto res =
+ arm_fully_connected_s8(&ctx, &fc_params, &quant_params, &input_dims, input_data, &filter_dims,
+ filter_data, &bias_dims, bias_data, &output_dims, output_data);
+ assert(res == ARM_MATH_SUCCESS);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALMul.h b/compiler/luci-interpreter/pal/cmsisnn/PALMul.h
index 2b46b100c..347a97a83 100644
--- a/compiler/luci-interpreter/pal/cmsisnn/PALMul.h
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALMul.h
@@ -21,21 +21,21 @@
namespace luci_interpreter_pal
{
+template <typename T>
static inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
- const float *input1_data, const tflite::RuntimeShape &input2_shape,
- const float *input2_data, const tflite::RuntimeShape &output_shape,
- float *output_data)
+ const T *input1_data, const tflite::RuntimeShape &input2_shape,
+ const T *input2_data, const tflite::RuntimeShape &output_shape,
+ T *output_data)
{
tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
input2_data, output_shape, output_data);
}
-static inline void BroadcastMul4DSlow(tflite::ArithmeticParams &params,
- const tflite::RuntimeShape &input1_shape,
- const float *input1_data,
- const tflite::RuntimeShape &input2_shape,
- const float *input2_data,
- const tflite::RuntimeShape &output_shape, float *output_data)
+template <typename T>
+static inline void
+BroadcastMul4DSlow(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+ const T *input1_data, const tflite::RuntimeShape &input2_shape,
+ const T *input2_data, const tflite::RuntimeShape &output_shape, T *output_data)
{
tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
input2_data, output_shape, output_data);
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h b/compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h
new file mode 100644
index 000000000..6046789ae
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALQuantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
+#define LUCI_INTERPRETER_PAL_QUANTIZE_H
+
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Quantize(tflite::QuantizationParams &params,
+ const tflite::RuntimeShape &input_shape, const float *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::reference_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+template <typename Input, typename Output>
+static inline void Requantize(const Input *input_data, int32_t size,
+ int32_t effective_scale_multiplier, int32_t effective_scale_shift,
+ int32_t input_zero_point, int32_t output_zero_point,
+ Output *output_data)
+{
+ tflite::reference_ops::Requantize(input_data, size, effective_scale_multiplier,
+ effective_scale_shift, input_zero_point, output_zero_point,
+ output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALSVDF.h b/compiler/luci-interpreter/pal/cmsisnn/PALSVDF.h
new file mode 100644
index 000000000..a4a5b2a78
--- /dev/null
+++ b/compiler/luci-interpreter/pal/cmsisnn/PALSVDF.h
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SVDF_H
+#define LUCI_INTERPRETER_PAL_SVDF_H
+
+#include <arm_nn_types.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+IntegerSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+ const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
+ const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+ const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
+ const int32_t *bias_data, int16_t *activation_state_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
+ int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
+ int scale_2_b, int32_t input_zp, int32_t output_zp)
+{
+ const int32_t rank = params.rank;
+ const int32_t batch_size = input_shape.Dims(0);
+ const int32_t num_filters = weight_feature_shape.Dims(0);
+ const int32_t memory_size = weight_time_shape.Dims(1);
+
+ cmsis_nn_dims input_dims;
+ input_dims.n = input_shape.Dims(0);
+ input_dims.h = input_shape.Dims(1);
+
+ cmsis_nn_dims weights_feature_dims;
+ weights_feature_dims.n = weight_feature_shape.Dims(0);
+ weights_feature_dims.h = weight_feature_shape.Dims(1);
+
+ cmsis_nn_dims weights_time_dims;
+ weights_time_dims.n = weight_time_shape.Dims(0);
+ weights_time_dims.h = weight_time_shape.Dims(1);
+
+ cmsis_nn_dims bias_dims;
+ bias_dims.n = bias_shape.Dims(0);
+
+ cmsis_nn_dims state_dims;
+ state_dims.n = batch_size;
+ state_dims.h = memory_size * num_filters;
+
+ cmsis_nn_dims output_dims;
+ output_dims.n = output_shape.Dims(0);
+ output_dims.h = output_shape.Dims(1);
+
+ cmsis_nn_svdf_params svdf_params;
+ svdf_params.rank = params.rank;
+ svdf_params.input_offset = input_zp;
+ svdf_params.output_offset = output_zp;
+
+ svdf_params.input_activation.min = INT16_MIN;
+ svdf_params.input_activation.max = INT16_MAX;
+
+ svdf_params.output_activation.min = INT8_MIN;
+ svdf_params.output_activation.max = INT8_MAX;
+
+ cmsis_nn_per_tensor_quant_params in_quant_params;
+ in_quant_params.multiplier = scale_1_a;
+ in_quant_params.shift = scale_1_b;
+
+ cmsis_nn_per_tensor_quant_params out_quant_params;
+ out_quant_params.multiplier = scale_2_a;
+ out_quant_params.shift = scale_2_b;
+
+ cmsis_nn_context scratch_ctx;
+ scratch_ctx.buf = scratchpad_data;
+
+ cmsis_nn_context scratch_output_ctx;
+ scratch_output_ctx.buf = output_temp_data;
+
+ arm_svdf_s8(&scratch_ctx, &scratch_output_ctx, &svdf_params, &in_quant_params, &out_quant_params,
+ &input_dims, input_data, &state_dims, activation_state_data, &weights_feature_dims,
+ weight_feature_data, &weights_time_dims, weight_time_data, &bias_dims, bias_data,
+ &output_dims, output_data);
+}
+static inline void
+FloatSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+ const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
+ const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+ const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
+ const float *bias_data, float *scratchpad_data, float *activation_state_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ const int32_t rank = params.rank;
+ const int32_t batch_size = input_shape.Dims(0);
+ const int32_t input_size = input_shape.Dims(1);
+ const int32_t num_filters = weight_feature_shape.Dims(0);
+ const int32_t num_units = num_filters / rank;
+ const int32_t memory_size = weight_time_shape.Dims(1);
+
+ // Left shift the activation_state.
+ {
+ float *new_state_start = activation_state_data;
+ const float *old_state_start = activation_state_data + 1;
+ const float *old_state_end = activation_state_data + batch_size * num_filters * memory_size;
+ while (old_state_start != old_state_end)
+ {
+ *new_state_start++ = *old_state_start++;
+ }
+ }
+
+ // Note: no need to clear the latest activation, matmul is not accumulative.
+
+ // Compute conv1d(inputs, weights_feature).
+ // The activation_state's rightmost column is used to save current cycle
+ // activation. This is achieved by starting at state_ptr[memory_size - 1] and
+ // having the stride equal to memory_size.
+
+ // Perform batched matrix vector multiply operation:
+ {
+ const float *matrix = weight_feature_data;
+ const float *vector = input_data;
+ float *result = &activation_state_data[memory_size - 1];
+ float *result_in_batch = result;
+ for (int i = 0; i < batch_size; ++i)
+ {
+ const float *matrix_ptr = matrix;
+ for (int j = 0; j < num_filters; ++j)
+ {
+ float dot_prod = 0.0f;
+ const float *vector_in_batch = vector + i * input_size;
+ for (int k = 0; k < input_size; ++k)
+ {
+ dot_prod += *matrix_ptr++ * *vector_in_batch++;
+ }
+ *result_in_batch = dot_prod;
+ result_in_batch += memory_size;
+ }
+ }
+ }
+
+ tflite::reference_ops::ApplyTimeWeightsBiasAndActivation(
+ batch_size, memory_size, num_filters, num_units, rank, weight_time_data, bias_data,
+ params.activation, activation_state_data, scratchpad_data, output_data);
+}
+
+static inline void SetupScratchpadTensor(
+ const luci_interpreter::DataType &input_data_type,
+ const luci_interpreter::DataType &weight_feature_data_type,
+ luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
+ luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
+ luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
+ const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
+ const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
+{
+ if (input_data_type == loco::DataType::FLOAT32 &&
+ (weight_feature_data_type == loco::DataType::S8 ||
+ weight_feature_data_type == loco::DataType::U8))
+ {
+ (void)input_shape;
+ (void)weight_time_shape;
+ (void)scratchpad_3;
+ (void)scratchpad_4;
+ (void)scratchpad_5;
+ (void)scratchpad_6;
+
+ throw std::runtime_error("Hybrid type is not supported for cmsisnn");
+ }
+
+ // Resize scratchpad_1 tensor
+ scratchpad_1->resize({batch_size, num_filters});
+
+ if (input_data_type == loco::DataType::S8)
+ {
+ // Resize scratchpad_2 for full_integer op
+ scratchpad_2->resize({batch_size, num_units});
+ }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SVDF_H
diff --git a/compiler/luci-interpreter/pal/cmsisnn/pal.cmake b/compiler/luci-interpreter/pal/cmsisnn/pal.cmake
index 9a25a3c5d..a68b363d9 100644
--- a/compiler/luci-interpreter/pal/cmsisnn/pal.cmake
+++ b/compiler/luci-interpreter/pal/cmsisnn/pal.cmake
@@ -42,9 +42,12 @@ macro(add_pal_to_target TGT)
"${TensorFlowSource_DIR}")
target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR})
- set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
+ file(GLOB_RECURSE PAL_SOURCES "${CMSISSource_DIR}/CMSIS/NN/Source/*.c")
+ list(APPEND PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc
+ ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
+ ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc)
add_library(luci_interpreter_cmsisnn_pal STATIC ${PAL_SOURCES})
- set_target_properties(luci_interpreter_cmsisnn_pal PROPERTIES POSITION_INDEPENDENT_CODE ON)
+ set_property(TARGET luci_interpreter_cmsisnn_pal PROPERTY POSITION_INDEPENDENT_CODE ON)
target_include_directories(luci_interpreter_cmsisnn_pal PRIVATE
"${TensorFlowRuySource_DIR}"
"${TensorFlowGEMMLowpSource_DIR}"
@@ -53,7 +56,7 @@ macro(add_pal_to_target TGT)
)
add_subdirectory(${CMSISSource_DIR}/CMSIS/NN ${CMAKE_CURRENT_BINARY_DIR}/CMSISNN)
- target_include_directories(luci_interpreter_cmsisnn_pal PRIVATE
+ target_include_directories(luci_interpreter_cmsisnn_pal PUBLIC
"${CMSISSource_DIR}/CMSIS/NN/Include"
"${CMSISSource_DIR}/CMSIS/DSP/Include"
"${CMSISSource_DIR}/CMSIS/Core/Include")
diff --git a/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst b/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst
index 9d541276c..428b15ee0 100644
--- a/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst
+++ b/compiler/luci-interpreter/pal/linux/KernelsToBuild.lst
@@ -1,19 +1,23 @@
REGISTER_KERNEL(Add)
REGISTER_KERNEL(ArgMax)
REGISTER_KERNEL(AveragePool2D)
+REGISTER_KERNEL(BatchMatMul)
REGISTER_KERNEL(BatchToSpaceND)
REGISTER_KERNEL(Cast)
REGISTER_KERNEL(Concatenation)
REGISTER_KERNEL(Conv2D)
REGISTER_KERNEL(DepthToSpace)
REGISTER_KERNEL(DepthwiseConv2D)
+REGISTER_KERNEL(Dequantize)
REGISTER_KERNEL(Div)
REGISTER_KERNEL(Elu)
REGISTER_KERNEL(Exp)
+REGISTER_KERNEL(ExpandDims)
REGISTER_KERNEL(Floor)
REGISTER_KERNEL(FloorDiv)
REGISTER_KERNEL(Equal)
REGISTER_KERNEL(FullyConnected)
+REGISTER_KERNEL(Gather)
REGISTER_KERNEL(Greater)
REGISTER_KERNEL(GreaterEqual)
REGISTER_KERNEL(If)
@@ -37,11 +41,13 @@ REGISTER_KERNEL(MirrorPad)
REGISTER_KERNEL(Mul)
REGISTER_KERNEL(Neg)
REGISTER_KERNEL(NotEqual)
+REGISTER_KERNEL(OneHot)
REGISTER_KERNEL(Pack)
REGISTER_KERNEL(Pad)
REGISTER_KERNEL(PadV2)
REGISTER_KERNEL(Pow)
REGISTER_KERNEL(PRelu)
+REGISTER_KERNEL(Quantize)
REGISTER_KERNEL(Relu)
REGISTER_KERNEL(Relu6)
REGISTER_KERNEL(Reshape)
@@ -61,6 +67,7 @@ REGISTER_KERNEL(Square)
REGISTER_KERNEL(SquaredDifference)
REGISTER_KERNEL(Squeeze)
REGISTER_KERNEL(Sub)
+REGISTER_KERNEL(SVDF)
REGISTER_KERNEL(Tanh)
REGISTER_KERNEL(Transpose)
REGISTER_KERNEL(TransposeConv)
diff --git a/compiler/luci-interpreter/pal/linux/PALAveragePool2d.h b/compiler/luci-interpreter/pal/linux/PALAveragePool2d.h
new file mode 100644
index 000000000..cce30601f
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALAveragePool2d.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+#define LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void AveragePool(const tflite::PoolParams &params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data,
+ const tflite::RuntimeShape &scratchpad_shape, T *scratchpad_data)
+{
+ {
+ // MARK: At this moment this operation doesn't support
+ assert(false && "AveragePool NYI");
+ (void)params;
+ (void)input_shape;
+ (void)input_data;
+ (void)output_shape;
+ (void)output_data;
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ }
+}
+
+template <>
+inline void AveragePool<int8_t>(const tflite::PoolParams &params,
+ const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data,
+ const tflite::RuntimeShape &scratchpad_shape,
+ int8_t *scratchpad_data)
+{
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+
+ tflite::reference_integer_ops::AveragePool(params, input_shape, input_data, output_shape,
+ output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+ const luci_interpreter::DataType &input_data_type,
+ const tflite::RuntimeShape &input_shape,
+ const tflite::RuntimeShape &output_shape)
+
+{
+ (void)input_data_type;
+ (void)input_shape;
+ (void)output_shape;
+
+ scratchpad->set_allocatable(false);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
diff --git a/compiler/luci-interpreter/pal/linux/PALBatchMatMul.h b/compiler/luci-interpreter/pal/linux/PALBatchMatMul.h
new file mode 100644
index 000000000..3894f2d92
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALBatchMatMul.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_BATCHMATMUL_H
+#define LUCI_INTERPRETER_PAL_BATCHMATMUL_H
+
+#include <tensorflow/lite/kernels/internal/reference/batch_matmul.h>
+
+namespace luci_interpreter_pal
+{
+inline void BatchMatMul(const tflite::RuntimeShape &lhs_shape, const float *lhs_data,
+ const tflite::RuntimeShape &rhs_shape, const float *rhs_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_ops::BatchMatMul(lhs_shape, lhs_data, rhs_shape, rhs_data, output_shape,
+ output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *lhs_scratchpad,
+ luci_interpreter::Tensor *rhs_scratchpad,
+ const tflite::RuntimeShape &lhs_shape,
+ const tflite::RuntimeShape &rhs_shape)
+{
+ // Scratchpad for transposed LHS
+ {
+ auto lhs_rank = lhs_shape.DimensionsCount();
+ luci_interpreter::Shape scratchpad_size(lhs_rank);
+ for (int i = 0; i < lhs_rank - 2; ++i)
+ {
+ scratchpad_size.dim(i) = lhs_shape.Dims(i);
+ }
+ scratchpad_size.dim(lhs_rank - 2) = lhs_shape.Dims(lhs_rank - 1);
+ scratchpad_size.dim(lhs_rank - 1) = lhs_shape.Dims(lhs_rank - 2);
+
+ lhs_scratchpad->resize(scratchpad_size);
+ }
+ // Scratchpad for transposed RHS
+ {
+ auto rhs_rank = rhs_shape.DimensionsCount();
+ luci_interpreter::Shape scratchpad_size(rhs_rank);
+ for (int i = 0; i < rhs_rank - 2; ++i)
+ {
+ scratchpad_size.dim(i) = rhs_shape.Dims(i);
+ }
+ scratchpad_size.dim(rhs_rank - 2) = rhs_shape.Dims(rhs_rank - 1);
+ scratchpad_size.dim(rhs_rank - 1) = rhs_shape.Dims(rhs_rank - 2);
+
+ rhs_scratchpad->resize(scratchpad_size);
+ }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_BATCHMATMUL_H
diff --git a/compiler/luci-interpreter/pal/linux/PALConv2d.h b/compiler/luci-interpreter/pal/linux/PALConv2d.h
index 2550dd5d7..985a15f39 100644
--- a/compiler/luci-interpreter/pal/linux/PALConv2d.h
+++ b/compiler/luci-interpreter/pal/linux/PALConv2d.h
@@ -26,14 +26,24 @@ static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeS
const float *input_data, const tflite::RuntimeShape &filter_shape,
const float *filter_data, const tflite::RuntimeShape &bias_shape,
const float *bias_data, const tflite::RuntimeShape &output_shape,
- float *output_data, const tflite::RuntimeShape &im2col_shape,
- float *im2col_data)
+ float *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ float *scratchpad_data)
{
- if (im2col_data)
+ (void)scratchpad_shape;
+ if (scratchpad_data)
{
+ const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+ const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+ const int32_t output_height = output_shape.Dims(1);
+ const int32_t output_width = output_shape.Dims(2);
+ const int32_t filter_height = filter_shape.Dims(1);
+ const int32_t filter_width = filter_shape.Dims(2);
+ tflite::RuntimeShape im2col_shape{batches, output_height, output_width,
+ input_depth * filter_height * filter_width};
+
tflite::optimized_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
bias_shape, bias_data, output_shape, output_data, im2col_shape,
- im2col_data);
+ scratchpad_data);
}
else
tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
@@ -45,8 +55,8 @@ static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeS
const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
const int32 *bias_data, const tflite::RuntimeShape &output_shape,
- uint8 *output_data, const tflite::RuntimeShape &im2col_shape,
- uint8 *im2col_data)
+ uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ uint8 *scratchpad_data)
{
// TODO This should only be done once (although it takes only a few microseconds).
// Also, the user should be able to adjust the number of threads.
@@ -54,8 +64,8 @@ static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeS
gemmlowp_context->set_max_num_threads(static_cast<int>(std::thread::hardware_concurrency()));
tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
- bias_shape, bias_data, output_shape, output_data, im2col_shape,
- im2col_data, gemmlowp_context.get());
+ bias_shape, bias_data, output_shape, output_data, scratchpad_shape,
+ scratchpad_data, gemmlowp_context.get());
}
static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult,
@@ -63,17 +73,55 @@ static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_
const int8 *input_data, const tflite::RuntimeShape &filter_shape,
const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
const int32 *bias_data, const tflite::RuntimeShape &output_shape,
- int8 *output_data, const tflite::RuntimeShape &im2col_shape,
- int8 *im2col_data)
+ int8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ int8 *scratchpad_data)
{
- (void)im2col_shape;
- (void)im2col_data;
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
// TODO enable optimized version
tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
filter_shape, filter_data, bias_shape, bias_data,
output_shape, output_data);
}
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+ const luci_interpreter::DataType &input_data_type,
+ const tflite::ConvParams &params,
+ const tflite::RuntimeShape &input_shape,
+ const tflite::RuntimeShape &filter_shape,
+ const tflite::RuntimeShape &output_shape)
+{
+ const int32_t filter_height = filter_shape.Dims(1);
+ const int32_t filter_width = filter_shape.Dims(2);
+
+ // Allocate tensor for scratchpad, if needed.
+ // The checks here should be aligned with the actual implementation.
+ const bool need_dilated_scratchpad =
+ params.dilation_height_factor != 1 || params.dilation_width_factor != 1;
+ const bool need_non_dilated_scratchpad = params.stride_height != 1 || params.stride_width != 1 ||
+ filter_height != 1 || filter_width != 1;
+ auto _need_scratchpad = input_data_type != luci_interpreter::DataType::S16 &&
+ (need_dilated_scratchpad || need_non_dilated_scratchpad);
+
+ if (_need_scratchpad)
+ {
+ const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+ const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+ const int32_t output_height = output_shape.Dims(1);
+ const int32_t output_width = output_shape.Dims(2);
+
+ auto data_type_size = static_cast<int32_t>(luci_interpreter::getDataTypeSize(input_data_type));
+ int32_t scratchpad_size = batches * output_width * output_height * input_depth * filter_height *
+ filter_width * data_type_size;
+ luci_interpreter::Shape scratchpad_shape{scratchpad_size};
+ scratchpad->resize(scratchpad_shape);
+ }
+ else
+ {
+ scratchpad->set_allocatable(false);
+ }
+}
+
} // namespace luci_interpreter_pal
#endif // LUCI_INTERPRETER_PAL_CONV2D_H
diff --git a/compiler/luci-interpreter/pal/linux/PALDepthwiseConv2d.h b/compiler/luci-interpreter/pal/linux/PALDepthwiseConv2d.h
new file mode 100644
index 000000000..c9d1a2948
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALDepthwiseConv2d.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+DepthwiseConvPerChannel(const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+ const int32_t *output_shift, const tflite::RuntimeShape &input_shape,
+ const T *input_data, const tflite::RuntimeShape &filter_shape,
+ const T *filter_data, const tflite::RuntimeShape &bias_shape,
+ const int32_t *bias_data, const tflite::RuntimeShape &output_shape,
+ T *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ T *scratchpad_data)
+{
+ {
+ // MARK: At this moment this operation is not supported
+ assert(false && "DepthwiseConvPerChannel NYI");
+ (void)params;
+ (void)output_multiplier;
+ (void)output_shift;
+ (void)input_shape;
+ (void)output_data;
+ (void)input_data;
+ (void)filter_shape;
+ (void)filter_data;
+ (void)bias_shape;
+ (void)bias_data;
+ (void)output_shape;
+ (void)output_data;
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ }
+}
+
+template <>
+inline void DepthwiseConvPerChannel<int8_t>(
+ const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+ const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+ const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+ const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data,
+ const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data)
+{
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ tflite::reference_integer_ops::DepthwiseConvPerChannel(
+ params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+ const tflite::DepthwiseParams &params,
+ const luci_interpreter::DataType &input_data_type,
+ const tflite::RuntimeShape &input_shape,
+ const tflite::RuntimeShape &filter_shape,
+ const tflite::RuntimeShape &output_shape)
+
+{
+ (void)params;
+ (void)input_data_type;
+ (void)input_shape;
+ (void)filter_shape;
+ (void)output_shape;
+
+ scratchpad->set_allocatable(false);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
diff --git a/compiler/luci-interpreter/pal/linux/PALDequantize.h b/compiler/luci-interpreter/pal/linux/PALDequantize.h
new file mode 100644
index 000000000..3af6d0777
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALDequantize.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Dequantize(tflite::DequantizationParams &params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::optimized_ops::Dequantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H
diff --git a/compiler/luci-interpreter/pal/linux/PALFullyConnected.h b/compiler/luci-interpreter/pal/linux/PALFullyConnected.h
new file mode 100644
index 000000000..62970dbf7
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALFullyConnected.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+#define LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+
+#include <tensorflow/lite/kernels/internal/reference/fully_connected.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void FullyConnected(const tflite::FullyConnectedParams &params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &filter_shape, const T *filter_data,
+ const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ {
+ // MARK: At this moment this operation doesn't support
+ assert(false && "FullyConnected NYI");
+ (void)params;
+ (void)input_shape;
+ (void)input_data;
+ (void)filter_shape;
+ (void)filter_data;
+ (void)bias_shape;
+ (void)bias_data;
+ (void)output_shape;
+ (void)output_data;
+ }
+}
+
+template <>
+inline void
+FullyConnected<int8_t>(const tflite::FullyConnectedParams &params,
+ const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+ const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+ const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data)
+{
+ tflite::reference_integer_ops::FullyConnected(params, input_shape, input_data, filter_shape,
+ filter_data, bias_shape, bias_data, output_shape,
+ output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
diff --git a/compiler/luci-interpreter/pal/linux/PALGather.h b/compiler/luci-interpreter/pal/linux/PALGather.h
new file mode 100644
index 000000000..49ac35f93
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALGather.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_GATHER_H
+#define LUCI_INTERPRETER_PAL_GATHER_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T, typename CoordsT = int32>
+static inline void Gather(const tflite::GatherParams &op_params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &coords_shape, const CoordsT *coords_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::optimized_ops::Gather(op_params, input_shape, input_data, coords_shape, coords_data,
+ output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_GATHER_H
diff --git a/compiler/luci-interpreter/pal/linux/PALMul.h b/compiler/luci-interpreter/pal/linux/PALMul.h
index cfaec1b58..a8a9d4abc 100644
--- a/compiler/luci-interpreter/pal/linux/PALMul.h
+++ b/compiler/luci-interpreter/pal/linux/PALMul.h
@@ -21,21 +21,31 @@
namespace luci_interpreter_pal
{
+template <typename T>
static inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
- const float *input1_data, const tflite::RuntimeShape &input2_shape,
- const float *input2_data, const tflite::RuntimeShape &output_shape,
- float *output_data)
+ const T *input1_data, const tflite::RuntimeShape &input2_shape,
+ const T *input2_data, const tflite::RuntimeShape &output_shape,
+ T *output_data)
{
tflite::optimized_ops::Mul(params, input1_shape, input1_data, input2_shape, input2_data,
output_shape, output_data);
}
-static inline void BroadcastMul4DSlow(tflite::ArithmeticParams &params,
- const tflite::RuntimeShape &input1_shape,
- const float *input1_data,
- const tflite::RuntimeShape &input2_shape,
- const float *input2_data,
- const tflite::RuntimeShape &output_shape, float *output_data)
+template <>
+inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+ const int64_t *input1_data, const tflite::RuntimeShape &input2_shape,
+ const int64_t *input2_data, const tflite::RuntimeShape &output_shape,
+ int64_t *output_data)
+{
+ tflite::optimized_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+ input2_data, output_shape, output_data);
+}
+
+template <typename T>
+static inline void
+BroadcastMul4DSlow(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+ const T *input1_data, const tflite::RuntimeShape &input2_shape,
+ const T *input2_data, const tflite::RuntimeShape &output_shape, T *output_data)
{
tflite::optimized_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
input2_data, output_shape, output_data);
diff --git a/compiler/luci-interpreter/pal/linux/PALQuantize.h b/compiler/luci-interpreter/pal/linux/PALQuantize.h
new file mode 100644
index 000000000..bf1d7954e
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALQuantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
+#define LUCI_INTERPRETER_PAL_QUANTIZE_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Quantize(tflite::QuantizationParams &params,
+ const tflite::RuntimeShape &input_shape, const float *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::optimized_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+template <typename Input, typename Output>
+static inline void Requantize(const Input *input_data, int32_t size,
+ int32_t effective_scale_multiplier, int32_t effective_scale_shift,
+ int32_t input_zero_point, int32_t output_zero_point,
+ Output *output_data)
+{
+ tflite::optimized_ops::Requantize(input_data, size, effective_scale_multiplier,
+ effective_scale_shift, input_zero_point, output_zero_point,
+ output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H
diff --git a/compiler/luci-interpreter/pal/linux/PALSVDF.h b/compiler/luci-interpreter/pal/linux/PALSVDF.h
new file mode 100644
index 000000000..0ffba14f0
--- /dev/null
+++ b/compiler/luci-interpreter/pal/linux/PALSVDF.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SVDF_H
+#define LUCI_INTERPRETER_PAL_SVDF_H
+
+#include <tensorflow/lite/kernels/internal/reference/svdf.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+IntegerSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+ const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
+ const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+ const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
+ const int32_t *bias_data, int16_t *activation_state_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
+ int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
+ int scale_2_b, int32_t input_zp, int32_t output_zp)
+{
+ tflite::reference_ops::EvalIntegerSVDF(&params, input_shape, input_data, weight_feature_shape,
+ weight_feature_data, weight_time_shape, weight_time_data,
+ bias_shape, bias_data, activation_state_data, output_shape,
+ output_data, scratchpad_data, output_temp_data, scale_1_a,
+ scale_1_b, scale_2_a, scale_2_b, input_zp, output_zp);
+}
+static inline void
+FloatSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+ const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
+ const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+ const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
+ const float *bias_data, float *scratchpad_data, float *activation_state_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_ops::EvalFloatSVDF(&params, input_shape, input_data, weight_feature_shape,
+ weight_feature_data, weight_time_shape, weight_time_data,
+ bias_shape, bias_data, scratchpad_data,
+ activation_state_data, output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(
+ const luci_interpreter::DataType &input_data_type,
+ const luci_interpreter::DataType &weight_feature_data_type,
+ luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
+ luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
+ luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
+ const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
+ const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
+{
+
+ if (input_data_type == loco::DataType::FLOAT32 &&
+ (weight_feature_data_type == loco::DataType::S8 ||
+ weight_feature_data_type == loco::DataType::U8))
+ {
+ (void)input_shape;
+ (void)weight_time_shape;
+ (void)scratchpad_3;
+ (void)scratchpad_4;
+ (void)scratchpad_5;
+ (void)scratchpad_6;
+
+ throw std::runtime_error("Hybrid type is not currently supported for linux platform");
+ }
+
+ // Resize scratchpad_1 tensor
+ scratchpad_1->resize({batch_size, num_filters});
+
+ if (input_data_type == loco::DataType::S8)
+ {
+ // Resize scratchpad_2 for full_integer op
+ scratchpad_2->resize({batch_size, num_units});
+ }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SVDF_H
diff --git a/compiler/luci-interpreter/pal/linux/pal.cmake b/compiler/luci-interpreter/pal/linux/pal.cmake
index 84349e0bf..185700cf9 100644
--- a/compiler/luci-interpreter/pal/linux/pal.cmake
+++ b/compiler/luci-interpreter/pal/linux/pal.cmake
@@ -40,7 +40,35 @@ macro(add_pal_to_target TGT)
# TODO put it back, I changed my mind.
# instead add sources with visitors in this library
- set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
+ set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
+ ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc
+ ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
+
+ if(BUILD_ARM32_NEON)
+ # NOTE may need to revise this list for version upgrade
+ set(PAL_SOURCES ${PAL_SOURCES}
+ ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc
+ ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/optimized/cpu_check.cc
+ ${TensorFlowRuySource_DIR}/ruy/allocator.cc
+ ${TensorFlowRuySource_DIR}/ruy/block_map.cc
+ ${TensorFlowRuySource_DIR}/ruy/blocking_counter.cc
+ ${TensorFlowRuySource_DIR}/ruy/context_get_ctx.cc
+ ${TensorFlowRuySource_DIR}/ruy/cpuinfo.cc
+ ${TensorFlowRuySource_DIR}/ruy/ctx.cc
+ ${TensorFlowRuySource_DIR}/ruy/denormal.cc
+ ${TensorFlowRuySource_DIR}/ruy/frontend.cc
+ ${TensorFlowRuySource_DIR}/ruy/pack_arm.cc
+ ${TensorFlowRuySource_DIR}/ruy/prepacked_cache.cc
+ ${TensorFlowRuySource_DIR}/ruy/prepare_packed_matrices.cc
+ ${TensorFlowRuySource_DIR}/ruy/system_aligned_alloc.cc
+ ${TensorFlowRuySource_DIR}/ruy/thread_pool.cc
+ ${TensorFlowRuySource_DIR}/ruy/trmul.cc
+ ${TensorFlowRuySource_DIR}/ruy/tune.cc
+ ${TensorFlowRuySource_DIR}/ruy/wait.cc
+ ${TensorFlowRuySource_DIR}/ruy/kernel_arm32.cc
+ )
+ endif(BUILD_ARM32_NEON)
+
add_library(luci_interpreter_linux_pal STATIC ${PAL_SOURCES})
set_target_properties(luci_interpreter_linux_pal PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_include_directories(luci_interpreter_linux_pal SYSTEM PRIVATE
diff --git a/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst b/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst
index 771974afe..d134a6b95 100644
--- a/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst
+++ b/compiler/luci-interpreter/pal/mcu/KernelsToBuild.lst
@@ -7,9 +7,11 @@ REGISTER_KERNEL(Concatenation)
REGISTER_KERNEL(Conv2D)
REGISTER_KERNEL(DepthToSpace)
REGISTER_KERNEL(DepthwiseConv2D)
+REGISTER_KERNEL(Dequantize)
REGISTER_KERNEL(Div)
REGISTER_KERNEL(Elu)
REGISTER_KERNEL(Exp)
+REGISTER_KERNEL(ExpandDims)
REGISTER_KERNEL(Floor)
REGISTER_KERNEL(FloorDiv)
REGISTER_KERNEL(Equal)
@@ -37,6 +39,7 @@ REGISTER_KERNEL(NotEqual)
REGISTER_KERNEL(Pad)
REGISTER_KERNEL(PadV2)
REGISTER_KERNEL(PRelu)
+REGISTER_KERNEL(Quantize)
REGISTER_KERNEL(Reshape)
REGISTER_KERNEL(ResizeBilinear)
REGISTER_KERNEL(ResizeNearestNeighbor)
@@ -50,6 +53,7 @@ REGISTER_KERNEL(Square)
REGISTER_KERNEL(SquaredDifference)
REGISTER_KERNEL(Squeeze)
REGISTER_KERNEL(Sub)
+REGISTER_KERNEL(SVDF)
REGISTER_KERNEL(Tanh)
REGISTER_KERNEL(Transpose)
REGISTER_KERNEL(TransposeConv)
diff --git a/compiler/luci-interpreter/pal/mcu/PALAveragePool2d.h b/compiler/luci-interpreter/pal/mcu/PALAveragePool2d.h
new file mode 100644
index 000000000..cce30601f
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALAveragePool2d.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+#define LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void AveragePool(const tflite::PoolParams &params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data,
+ const tflite::RuntimeShape &scratchpad_shape, T *scratchpad_data)
+{
+ {
+ // MARK: At this moment this operation doesn't support
+ assert(false && "AveragePool NYI");
+ (void)params;
+ (void)input_shape;
+ (void)input_data;
+ (void)output_shape;
+ (void)output_data;
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ }
+}
+
+template <>
+inline void AveragePool<int8_t>(const tflite::PoolParams &params,
+ const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data,
+ const tflite::RuntimeShape &scratchpad_shape,
+ int8_t *scratchpad_data)
+{
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+
+ tflite::reference_integer_ops::AveragePool(params, input_shape, input_data, output_shape,
+ output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+ const luci_interpreter::DataType &input_data_type,
+ const tflite::RuntimeShape &input_shape,
+ const tflite::RuntimeShape &output_shape)
+
+{
+ (void)input_data_type;
+ (void)input_shape;
+ (void)output_shape;
+
+ scratchpad->set_allocatable(false);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALConv2d.h b/compiler/luci-interpreter/pal/mcu/PALConv2d.h
index 0a8ae4e48..13976877a 100644
--- a/compiler/luci-interpreter/pal/mcu/PALConv2d.h
+++ b/compiler/luci-interpreter/pal/mcu/PALConv2d.h
@@ -26,11 +26,11 @@ static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeS
const float *input_data, const tflite::RuntimeShape &filter_shape,
const float *filter_data, const tflite::RuntimeShape &bias_shape,
const float *bias_data, const tflite::RuntimeShape &output_shape,
- float *output_data, const tflite::RuntimeShape &im2col_shape,
- float *im2col_data)
+ float *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ float *scratchpad_data)
{
- (void)im2col_shape;
- (void)im2col_data;
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
bias_shape, bias_data, output_shape, output_data,
tflite::RuntimeShape(), nullptr);
@@ -40,14 +40,14 @@ static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeS
const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
const int32 *bias_data, const tflite::RuntimeShape &output_shape,
- uint8 *output_data, const tflite::RuntimeShape &im2col_shape,
- uint8 *im2col_data)
+ uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ uint8 *scratchpad_data)
{
- (void)im2col_shape;
- (void)im2col_data;
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
- bias_shape, bias_data, output_shape, output_data, im2col_shape,
- im2col_data, nullptr);
+ bias_shape, bias_data, output_shape, output_data, scratchpad_shape,
+ scratchpad_data, nullptr);
}
static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult,
@@ -55,16 +55,31 @@ static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_
const int8 *input_data, const tflite::RuntimeShape &filter_shape,
const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
const int32 *bias_data, const tflite::RuntimeShape &output_shape,
- int8 *output_data, const tflite::RuntimeShape &im2col_shape,
- int8 *im2col_data)
+ int8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ int8 *scratchpad_data)
{
- (void)im2col_shape;
- (void)im2col_data;
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
filter_shape, filter_data, bias_shape, bias_data,
output_shape, output_data);
}
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+ const luci_interpreter::DataType &input_data_type,
+ const tflite::ConvParams &params,
+ const tflite::RuntimeShape &input_shape,
+ const tflite::RuntimeShape &filter_shape,
+ const tflite::RuntimeShape &output_shape)
+{
+ (void)input_data_type;
+ (void)params;
+ (void)input_shape;
+ (void)filter_shape;
+ (void)output_shape;
+ scratchpad->set_allocatable(false);
+}
+
} // namespace luci_interpreter_pal
#endif // LUCI_INTERPRETER_PAL_CONV2D_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h b/compiler/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h
new file mode 100644
index 000000000..c9d1a2948
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+DepthwiseConvPerChannel(const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+ const int32_t *output_shift, const tflite::RuntimeShape &input_shape,
+ const T *input_data, const tflite::RuntimeShape &filter_shape,
+ const T *filter_data, const tflite::RuntimeShape &bias_shape,
+ const int32_t *bias_data, const tflite::RuntimeShape &output_shape,
+ T *output_data, const tflite::RuntimeShape &scratchpad_shape,
+ T *scratchpad_data)
+{
+ {
+ // MARK: At this moment this operation is not supported
+ assert(false && "DepthwiseConvPerChannel NYI");
+ (void)params;
+ (void)output_multiplier;
+ (void)output_shift;
+ (void)input_shape;
+ (void)output_data;
+ (void)input_data;
+ (void)filter_shape;
+ (void)filter_data;
+ (void)bias_shape;
+ (void)bias_data;
+ (void)output_shape;
+ (void)output_data;
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ }
+}
+
+template <>
+inline void DepthwiseConvPerChannel<int8_t>(
+ const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+ const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+ const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+ const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data,
+ const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data)
+{
+ (void)scratchpad_shape;
+ (void)scratchpad_data;
+ tflite::reference_integer_ops::DepthwiseConvPerChannel(
+ params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
+ bias_shape, bias_data, output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+ const tflite::DepthwiseParams &params,
+ const luci_interpreter::DataType &input_data_type,
+ const tflite::RuntimeShape &input_shape,
+ const tflite::RuntimeShape &filter_shape,
+ const tflite::RuntimeShape &output_shape)
+
+{
+ (void)params;
+ (void)input_data_type;
+ (void)input_shape;
+ (void)filter_shape;
+ (void)output_shape;
+
+ scratchpad->set_allocatable(false);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALDequantize.h b/compiler/luci-interpreter/pal/mcu/PALDequantize.h
new file mode 100644
index 000000000..15ff0327b
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALDequantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h"
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+
+namespace luci_interpreter_pal
+{
+
+template <typename T>
+static inline void Dequantize(tflite::DequantizationParams &params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_integer_ops::Dequantize<T>(params, input_shape, input_data, output_shape,
+ output_data);
+}
+
+static inline void Dequantize(tflite::DequantizationParams &params,
+ const tflite::RuntimeShape &input_shape, const uint8_t *input_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ tflite::reference_ops::Dequantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALFullyConnected.h b/compiler/luci-interpreter/pal/mcu/PALFullyConnected.h
new file mode 100644
index 000000000..048624d74
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALFullyConnected.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+#define LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+
+#include <tensorflow/lite/kernels/internal/reference/fully_connected.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void FullyConnected(const tflite::FullyConnectedParams &params,
+ const tflite::RuntimeShape &input_shape, const T *input_data,
+ const tflite::RuntimeShape &filter_shape, const T *filter_data,
+ const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ {
+ // MARK: At this moment this operation is not supported
+ assert(false && "FullyConnected NYI");
+ (void)params;
+ (void)input_shape;
+ (void)input_data;
+ (void)filter_shape;
+ (void)filter_data;
+ (void)bias_shape;
+ (void)bias_data;
+ (void)output_shape;
+ (void)output_data;
+ }
+}
+
+template <>
+inline void
+FullyConnected<int8_t>(const tflite::FullyConnectedParams &params,
+ const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+ const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+ const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data)
+{
+ tflite::reference_integer_ops::FullyConnected(params, input_shape, input_data, filter_shape,
+ filter_data, bias_shape, bias_data, output_shape,
+ output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALMul.h b/compiler/luci-interpreter/pal/mcu/PALMul.h
index 2b46b100c..347a97a83 100644
--- a/compiler/luci-interpreter/pal/mcu/PALMul.h
+++ b/compiler/luci-interpreter/pal/mcu/PALMul.h
@@ -21,21 +21,21 @@
namespace luci_interpreter_pal
{
+template <typename T>
static inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
- const float *input1_data, const tflite::RuntimeShape &input2_shape,
- const float *input2_data, const tflite::RuntimeShape &output_shape,
- float *output_data)
+ const T *input1_data, const tflite::RuntimeShape &input2_shape,
+ const T *input2_data, const tflite::RuntimeShape &output_shape,
+ T *output_data)
{
tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
input2_data, output_shape, output_data);
}
-static inline void BroadcastMul4DSlow(tflite::ArithmeticParams &params,
- const tflite::RuntimeShape &input1_shape,
- const float *input1_data,
- const tflite::RuntimeShape &input2_shape,
- const float *input2_data,
- const tflite::RuntimeShape &output_shape, float *output_data)
+template <typename T>
+static inline void
+BroadcastMul4DSlow(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+ const T *input1_data, const tflite::RuntimeShape &input2_shape,
+ const T *input2_data, const tflite::RuntimeShape &output_shape, T *output_data)
{
tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
input2_data, output_shape, output_data);
diff --git a/compiler/luci-interpreter/pal/mcu/PALQuantize.h b/compiler/luci-interpreter/pal/mcu/PALQuantize.h
new file mode 100644
index 000000000..6046789ae
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALQuantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
+#define LUCI_INTERPRETER_PAL_QUANTIZE_H
+
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Quantize(tflite::QuantizationParams &params,
+ const tflite::RuntimeShape &input_shape, const float *input_data,
+ const tflite::RuntimeShape &output_shape, T *output_data)
+{
+ tflite::reference_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+template <typename Input, typename Output>
+static inline void Requantize(const Input *input_data, int32_t size,
+ int32_t effective_scale_multiplier, int32_t effective_scale_shift,
+ int32_t input_zero_point, int32_t output_zero_point,
+ Output *output_data)
+{
+ tflite::reference_ops::Requantize(input_data, size, effective_scale_multiplier,
+ effective_scale_shift, input_zero_point, output_zero_point,
+ output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H
diff --git a/compiler/luci-interpreter/pal/mcu/PALSVDF.h b/compiler/luci-interpreter/pal/mcu/PALSVDF.h
new file mode 100644
index 000000000..3bba668fb
--- /dev/null
+++ b/compiler/luci-interpreter/pal/mcu/PALSVDF.h
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SVDF_H
+#define LUCI_INTERPRETER_PAL_SVDF_H
+
+#include <tensorflow/lite/kernels/internal/reference/svdf.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+IntegerSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+ const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
+ const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+ const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
+ const int32_t *bias_data, int16_t *activation_state_data,
+ const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
+ int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
+ int scale_2_b, int32_t input_zp, int32_t output_zp)
+{
+ const int n_rank = params.rank;
+ const int n_batch = input_shape.Dims(0);
+ const int n_input = input_shape.Dims(1);
+ const int n_filter = weight_feature_shape.Dims(0);
+ const int n_unit = n_filter / n_rank;
+ const int n_memory = weight_time_shape.Dims(1);
+
+ // Left shift the activation_state.
+ {
+ int16_t *new_state_start = activation_state_data;
+ const int16_t *old_state_start = activation_state_data + 1;
+ const int16_t *old_state_end = activation_state_data + n_batch * n_filter * n_memory;
+ while (old_state_start != old_state_end)
+ {
+ *new_state_start++ = *old_state_start++;
+ }
+ }
+
+ // Note: no need to clear the latest activation, matmul is not accumulative.
+
+ // Feature matmul.
+ {
+ const int32_t output_max = std::numeric_limits<int16_t>::max();
+ const int32_t output_min = std::numeric_limits<int16_t>::min();
+ int16_t *result_in_batch = activation_state_data + (n_memory - 1);
+ for (int b = 0; b < n_batch; b++)
+ {
+ const int8_t *matrix_ptr = weight_feature_data;
+ for (int r = 0; r < n_filter; r++)
+ {
+ int32_t dot_prod = 0;
+ const int8_t *vector_in_batch = input_data + b * n_input;
+ for (int c = 0; c < n_input; c++)
+ {
+ dot_prod += *matrix_ptr++ * (*vector_in_batch++ - input_zp);
+ }
+ dot_prod = tflite::MultiplyByQuantizedMultiplier(dot_prod, scale_1_a, scale_1_b);
+ dot_prod = std::min(std::max(output_min, dot_prod), output_max);
+ // This assumes state is symmetrically quantized. Otherwise last bit of
+ // state should be initialized to its zero point and accumulate the
+ // dot_prod.
+ // Equivalent as the following:
+ // result_in_batch = zero point, which happens to be zero.
+ // result_in_batch += dot_prod_56.
+ *result_in_batch = dot_prod;
+ result_in_batch += n_memory;
+ }
+ }
+ }
+
+ // Time.
+ {
+ for (int b = 0; b < n_batch; ++b)
+ {
+ int32_t *scratch_ptr_batch = scratchpad_data + b * n_filter;
+
+ // Perform batched vector dot product:
+ const int16_t *vector1_ptr = weight_time_data;
+ const int16_t *vector2_ptr = activation_state_data + b * n_memory * n_filter;
+
+ for (int i = 0; i < n_filter; i++)
+ {
+ *scratch_ptr_batch = 0;
+ for (int j = 0; j < n_memory; j++)
+ {
+ *scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++;
+ }
+ scratch_ptr_batch++;
+ }
+ }
+ }
+
+ // Reduce, add bias, rescale, activation.
+ {
+ // Add bias.
+ if (bias_data)
+ {
+ // Vector batch assign:
+ for (int i = 0; i < n_batch; ++i)
+ {
+ int32_t *output_ptr = output_temp_data + i * n_unit;
+ const int32_t *bias_ptr = bias_data;
+ for (int j = 0; j < n_unit; ++j)
+ {
+ *output_ptr++ = *bias_ptr++;
+ }
+ }
+ }
+ else
+ {
+ int32_t *output_ptr = output_temp_data;
+ for (int i = 0; i < n_batch * n_unit; ++i)
+ {
+ *output_ptr++ = 0;
+ }
+ }
+
+ // Reduce.
+ for (int b = 0; b < n_batch; ++b)
+ {
+ int32_t *output_temp_ptr = output_temp_data + b * n_unit;
+ int32_t *scratch_ptr_batch = scratchpad_data + b * n_filter;
+
+ // Reduction sum vector
+ for (int i = 0; i < n_unit; ++i)
+ {
+ for (int j = 0; j < n_rank; ++j)
+ {
+ output_temp_ptr[i] += *scratch_ptr_batch++;
+ }
+ }
+ }
+
+ // Rescale.
+ const int32_t output_max = std::numeric_limits<int8_t>::max();
+ const int32_t output_min = std::numeric_limits<int8_t>::min();
+ for (int i = 0; i < n_batch * n_unit; ++i)
+ {
+ int32_t x1 = output_temp_data[i];
+ int32_t x2 = tflite::MultiplyByQuantizedMultiplier(x1, scale_2_a, scale_2_b);
+ int32_t x3 = x2 + output_zp;
+ int32_t x4 = std::min(std::max(output_min, x3), output_max);
+ output_data[i] = static_cast<int8_t>(x4);
+ }
+ }
+}
+static inline void
+FloatSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+ const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
+ const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+ const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
+ const float *bias_data, float *scratchpad_data, float *activation_state_data,
+ const tflite::RuntimeShape &output_shape, float *output_data)
+{
+ const int32_t rank = params.rank;
+ const int32_t batch_size = input_shape.Dims(0);
+ const int32_t input_size = input_shape.Dims(1);
+ const int32_t num_filters = weight_feature_shape.Dims(0);
+ const int32_t num_units = num_filters / rank;
+ const int32_t memory_size = weight_time_shape.Dims(1);
+
+ // Left shift the activation_state.
+ {
+ float *new_state_start = activation_state_data;
+ const float *old_state_start = activation_state_data + 1;
+ const float *old_state_end = activation_state_data + batch_size * num_filters * memory_size;
+ while (old_state_start != old_state_end)
+ {
+ *new_state_start++ = *old_state_start++;
+ }
+ }
+
+ // Note: no need to clear the latest activation, matmul is not accumulative.
+
+ // Compute conv1d(inputs, weights_feature).
+ // The activation_state's rightmost column is used to save current cycle
+ // activation. This is achieved by starting at state_ptr[memory_size - 1] and
+ // having the stride equal to memory_size.
+
+ // Perform batched matrix vector multiply operation:
+ {
+ const float *matrix = weight_feature_data;
+ const float *vector = input_data;
+ float *result = &activation_state_data[memory_size - 1];
+ float *result_in_batch = result;
+ for (int i = 0; i < batch_size; ++i)
+ {
+ const float *matrix_ptr = matrix;
+ for (int j = 0; j < num_filters; ++j)
+ {
+ float dot_prod = 0.0f;
+ const float *vector_in_batch = vector + i * input_size;
+ for (int k = 0; k < input_size; ++k)
+ {
+ dot_prod += *matrix_ptr++ * *vector_in_batch++;
+ }
+ *result_in_batch = dot_prod;
+ result_in_batch += memory_size;
+ }
+ }
+ }
+
+ tflite::reference_ops::ApplyTimeWeightsBiasAndActivation(
+ batch_size, memory_size, num_filters, num_units, rank, weight_time_data, bias_data,
+ params.activation, activation_state_data, scratchpad_data, output_data);
+}
+
+static inline void SetupScratchpadTensor(
+ const luci_interpreter::DataType &input_data_type,
+ const luci_interpreter::DataType &weight_feature_data_type,
+ luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
+ luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
+ luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
+ const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
+ const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
+{
+
+ if (input_data_type == loco::DataType::FLOAT32 &&
+ (weight_feature_data_type == loco::DataType::S8 ||
+ weight_feature_data_type == loco::DataType::U8))
+ {
+ (void)input_shape;
+ (void)weight_time_shape;
+ (void)scratchpad_3;
+ (void)scratchpad_4;
+ (void)scratchpad_5;
+ (void)scratchpad_6;
+
+ throw std::runtime_error("Hybrid type is not currently supported for mcu platform");
+ }
+
+ // Resize scratchpad_1 tensor
+ scratchpad_1->resize({batch_size, num_filters});
+
+ if (input_data_type == loco::DataType::S8)
+ {
+ // Resize scratchpad_2 for full_integer op
+ scratchpad_2->resize({batch_size, num_units});
+ }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SVDF_H
diff --git a/compiler/luci-interpreter/pal/mcu/pal.cmake b/compiler/luci-interpreter/pal/mcu/pal.cmake
index a479d407b..907d51de6 100644
--- a/compiler/luci-interpreter/pal/mcu/pal.cmake
+++ b/compiler/luci-interpreter/pal/mcu/pal.cmake
@@ -39,7 +39,9 @@ macro(add_pal_to_target TGT)
# TODO put it back, I changed my mind.
# instead add sources with visitors in this library
- set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
+ set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc
+ ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
+ ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc)
add_library(luci_interpreter_mcu_pal STATIC ${PAL_SOURCES})
set_target_properties(luci_interpreter_mcu_pal PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_include_directories(luci_interpreter_mcu_pal PRIVATE
diff --git a/compiler/luci-interpreter/src/CMakeLists.txt b/compiler/luci-interpreter/src/CMakeLists.txt
index e37150336..997b75a84 100644
--- a/compiler/luci-interpreter/src/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/CMakeLists.txt
@@ -13,6 +13,7 @@ set(LUCI_INTERPRETER_BINARY "luci_interpreter${LUCI_INTERPRETER_SUFFIX}")
set(LUCI_INTERPRETER_CORE "luci_interpreter_core${LUCI_INTERPRETER_SUFFIX}")
set(LUCI_INTERPRETER_KERNELS "luci_interpreter_kernels${LUCI_INTERPRETER_SUFFIX}")
set(LUCI_INTERPRETER_LOADER "luci_interpreter_loader${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_IMPORT "luci_interpreter_import${LUCI_INTERPRETER_SUFFIX}")
add_subdirectory(core)
message(STATUS "LUCI INTERPRETER CORE")
@@ -20,6 +21,8 @@ add_subdirectory(kernels)
message(STATUS "LUCI INTERPRETER KERNELS")
add_subdirectory(loader)
message(STATUS "LUCI INTERPRETER LOADER")
+add_subdirectory(import)
+message(STATUS "LUCI INTERPRETER IMPORT")
message(STATUS "LUCI INTERPTER INITALIZED")
diff --git a/compiler/luci-interpreter/src/Interpreter.cpp b/compiler/luci-interpreter/src/Interpreter.cpp
index 1b8792a6c..8cf272efd 100644
--- a/compiler/luci-interpreter/src/Interpreter.cpp
+++ b/compiler/luci-interpreter/src/Interpreter.cpp
@@ -70,25 +70,30 @@ private:
} // namespace
+Interpreter::Interpreter(const luci::Module *module)
+{
+ _runtime_to_ir = std::make_unique<RuntimeToIR>();
+ _event_notifier = std::make_unique<EventNotifierImpl>(*_runtime_to_ir, _observers);
+ _runtime_module = std::make_unique<RuntimeModule>(_event_notifier.get());
+
+ _default_memory_manager = std::make_unique<SimpleMemoryManager>();
+
+ ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor,
+ _default_memory_manager.get());
+ loader.load();
+}
+
Interpreter::Interpreter(const luci::Module *module,
luci_interpreter::IMemoryManager *memory_manager)
{
+ assert(memory_manager && "Use Interpreter::Interpreter(module) constructor instead");
+
_runtime_to_ir = std::make_unique<RuntimeToIR>();
_event_notifier = std::make_unique<EventNotifierImpl>(*_runtime_to_ir, _observers);
_runtime_module = std::make_unique<RuntimeModule>(_event_notifier.get());
- if (memory_manager == nullptr)
- {
- _default_memory_manager = std::make_unique<SimpleMemoryManager>();
- _memory_manager = _default_memory_manager.get();
- }
- else
- {
- _memory_manager = memory_manager;
- }
-
ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor,
- _memory_manager);
+ memory_manager);
loader.load();
}
diff --git a/compiler/luci-interpreter/src/core/CMakeLists.txt b/compiler/luci-interpreter/src/core/CMakeLists.txt
index 4430cba11..c2471e01c 100644
--- a/compiler/luci-interpreter/src/core/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/core/CMakeLists.txt
@@ -10,7 +10,9 @@ set(SOURCES
Tensor.cpp)
add_library(${LUCI_INTERPRETER_CORE} STATIC ${SOURCES})
-set_target_properties(${LUCI_INTERPRETER_CORE} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+ set_target_properties(${LUCI_INTERPRETER_CORE} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
target_link_libraries(${LUCI_INTERPRETER_CORE} PUBLIC luci_lang)
diff --git a/compiler/luci-interpreter/src/core/KernelParams.h b/compiler/luci-interpreter/src/core/KernelParams.h
index ee0390fcc..958fd4b74 100644
--- a/compiler/luci-interpreter/src/core/KernelParams.h
+++ b/compiler/luci-interpreter/src/core/KernelParams.h
@@ -43,6 +43,12 @@ struct ArgMaxParams
DataType output_type;
};
+struct BatchMatMulParams
+{
+ bool adj_x;
+ bool adj_y;
+};
+
struct ConcatenationParams
{
int axis;
@@ -83,6 +89,13 @@ struct DivParams
struct FullyConnectedParams
{
Activation activation;
+ bool keep_num_dims = false;
+};
+
+struct GatherParams
+{
+ int32_t axis;
+ int32_t batch_dims;
};
struct InstanceNormParams
@@ -119,6 +132,11 @@ struct MulParams
Activation activation;
};
+struct OneHotParams
+{
+ int32_t axis;
+};
+
struct PackParams
{
int32_t values_count;
@@ -157,6 +175,13 @@ struct SubParams
Activation activation;
};
+struct SVDFParams
+{
+ bool asymmetric_quantize_inputs;
+ int32_t svdf_rank;
+ Activation activation;
+};
+
struct SpaceToDepthParams
{
int block_size;
diff --git a/compiler/luci-interpreter/src/import/CMakeLists.txt b/compiler/luci-interpreter/src/import/CMakeLists.txt
new file mode 100644
index 000000000..dd9733f92
--- /dev/null
+++ b/compiler/luci-interpreter/src/import/CMakeLists.txt
@@ -0,0 +1,15 @@
+set(SOURCES
+ "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/GraphBuilderRegistry.h"
+ GraphBuilderRegistry.cpp)
+
+# include specific builders
+file(GLOB_RECURSE NODES "Nodes/*")
+list(APPEND SOURCES ${NODES})
+
+add_library(${LUCI_INTERPRETER_IMPORT} STATIC ${SOURCES})
+if (NOT NNCC_LIBRARY_NO_PIC)
+ set_target_properties(${LUCI_INTERPRETER_IMPORT} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
+
+target_include_directories(${LUCI_INTERPRETER_IMPORT} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
+target_link_libraries(${LUCI_INTERPRETER_IMPORT} PUBLIC luci_import)
diff --git a/compiler/luci-interpreter/src/import/GraphBuilderRegistry.cpp b/compiler/luci-interpreter/src/import/GraphBuilderRegistry.cpp
new file mode 100644
index 000000000..a33bca6a4
--- /dev/null
+++ b/compiler/luci-interpreter/src/import/GraphBuilderRegistry.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci_interpreter/GraphBuilderRegistry.h"
+#include "Nodes/CircleReferencingConst.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<luci::GraphBuilderSource> source_without_constant_copying()
+{
+ auto builder = std::make_unique<luci::GraphBuilderRegistry>();
+ {
+ // redefine NodeBuilder of BUFFER type
+ builder->add(std::make_unique<CircleReferencingConstNodeBuilder>());
+ }
+
+ return builder;
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp b/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp
new file mode 100644
index 000000000..14e90f240
--- /dev/null
+++ b/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleReferencingConst.h"
+
+#include <vector>
+
+namespace
+{
+
+// helper struct which describes data loaded to custom_options of CircleReferencingConst node
+struct ConstDataReference
+{
+ const uint8_t *data = nullptr;
+ uint32_t size = 0;
+};
+
+} // namespace
+
+namespace luci_interpreter
+{
+using namespace luci;
+
+CircleNode *CircleReferencingConstNodeBuilder::build(TensorIndex tensor_index,
+ GraphBuilderContext *context) const
+{
+ assert(tensor_index >= 0);
+
+ const auto graph = context->graph();
+ const auto reader = context->reader();
+ const auto tensors = reader->tensors();
+ auto const const_tensor = tensors[tensor_index];
+ assert(const_tensor != nullptr);
+ if (const_tensor->is_variable())
+ {
+ // Create CircleVariable for variable
+ return nullptr;
+ }
+
+ auto const buffer = wrap(reader->buffers()[const_tensor->buffer()]->data());
+ auto const const_dims = wrap(const_tensor->shape()); // in NHWC
+ if (const_dims.empty() && buffer.empty())
+ {
+ // unknown shape tensor and scalar tensor
+ return nullptr;
+ }
+
+ // if tensor_index is used as output to some other operator, this is not a constant
+ auto tensoroutputs = context->tensoroutputs();
+ if (tensoroutputs->find(tensor_index))
+ {
+ // other operator output tensor
+ return nullptr;
+ }
+
+ uint32_t num_elements = 1;
+ for (uint32_t r = 0; r < const_dims.size(); ++r)
+ {
+ num_elements = num_elements * const_dims[r];
+ }
+
+ if (buffer.empty() && num_elements > 0)
+ {
+ // normal empty tensor
+ return nullptr;
+ }
+
+ // create CircleReferencingConst
+ auto custom_node = graph->nodes()->create<CircleCustom>(0, 1);
+ {
+ custom_node->custom_code("CircleReferencingConst");
+
+ copy_tensor_attributes(const_tensor, custom_node);
+ custom_node->shape_status(luci::ShapeStatus::VALID);
+
+ // custom options stores size of buffer and pointer's value to buffer's data
+ {
+ std::vector<uint8_t> custom_options(sizeof(ConstDataReference));
+ {
+ auto &const_data_ref = *reinterpret_cast<ConstDataReference *>(custom_options.data());
+ const_data_ref = {buffer.data(), buffer.size()};
+ }
+ custom_node->custom_options(custom_options);
+ }
+ }
+
+ // Output of CircleCustom node presented with CircleConstNode
+ auto out_node = graph->nodes()->create<CircleCustomOut>();
+ {
+ out_node->index(0);
+ out_node->input(custom_node);
+
+ copy_tensor_attributes(const_tensor, out_node);
+ out_node->shape_status(luci::ShapeStatus::VALID);
+ }
+
+ return out_node;
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.h b/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.h
new file mode 100644
index 000000000..ed8f95124
--- /dev/null
+++ b/compiler/luci-interpreter/src/import/Nodes/CircleReferencingConst.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__
+#define __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__
+
+#include <luci/Import/NodeBuilder.h>
+
+#include <luci/IR/Nodes/CircleConst.h>
+
+namespace luci_interpreter
+{
+using namespace luci;
+
+/**
+ * @brief Builder creates CircleCustom node with pointer to constants data from Tensor with buffer.
+ */
+class CircleReferencingConstNodeBuilder : public TypedNodeBuilder<NodeBuilderType::BUFFER>
+{
+public:
+ CircleNode *build(TensorIndex tensor_index, GraphBuilderContext *ctx) const final;
+};
+
+} // namespace luci_interpreter
+
+#endif // __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__
diff --git a/compiler/luci-interpreter/src/kernels/Add.cpp b/compiler/luci-interpreter/src/kernels/Add.cpp
index 7381c3849..d7bf3084f 100644
--- a/compiler/luci-interpreter/src/kernels/Add.cpp
+++ b/compiler/luci-interpreter/src/kernels/Add.cpp
@@ -38,8 +38,11 @@ Add::Add(const Tensor *input1, const Tensor *input2, Tensor *output, const AddPa
void Add::configure()
{
LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+ LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
if (input1()->element_type() == DataType::S16)
{
+ LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 &&
+ input2()->zero_points().size() == 1);
LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 &&
output()->zero_point() == 0);
}
@@ -54,6 +57,12 @@ void Add::execute() const
case DataType::FLOAT32:
evalFloat();
break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
case DataType::U8:
evalQuantized();
break;
@@ -67,13 +76,8 @@ void Add::execute() const
void Add::evalFloat() const
{
- float activation_min{};
- float activation_max{};
- calculateActivationRange(_params.activation, &activation_min, &activation_max);
-
tflite::ArithmeticParams params{};
- params.float_activation_min = activation_min;
- params.float_activation_max = activation_max;
+ fillArithmeticActivationRange<float>(params, _params.activation);
const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
getTensorShape(input1()), getTensorShape(input2()), &params);
@@ -92,6 +96,28 @@ void Add::evalFloat() const
}
}
+template <typename T> void Add::evalInteger() const
+{
+ tflite::ArithmeticParams params{};
+ fillArithmeticActivationRange<T>(params, _params.activation);
+
+ const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+ getTensorShape(input1()), getTensorShape(input2()), &params);
+
+ if (need_broadcast)
+ {
+ tflite::reference_ops::BroadcastAdd4DSlow(
+ params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+ getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+ }
+ else
+ {
+ tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<T>(input1()),
+ getTensorShape(input2()), getTensorData<T>(input2()),
+ getTensorShape(output()), getTensorData<T>(output()));
+ }
+}
+
void Add::evalQuantized() const
{
const auto input1_scale = static_cast<double>(input1()->scale());
diff --git a/compiler/luci-interpreter/src/kernels/Add.h b/compiler/luci-interpreter/src/kernels/Add.h
index 79518845d..91d95b6af 100644
--- a/compiler/luci-interpreter/src/kernels/Add.h
+++ b/compiler/luci-interpreter/src/kernels/Add.h
@@ -39,6 +39,7 @@ public:
private:
void evalFloat() const;
+ template <typename T> void evalInteger() const;
void evalQuantized() const;
void evalQuantizedS16() const;
};
diff --git a/compiler/luci-interpreter/src/kernels/Add.test.cpp b/compiler/luci-interpreter/src/kernels/Add.test.cpp
index 847b65667..b8b1c3089 100644
--- a/compiler/luci-interpreter/src/kernels/Add.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Add.test.cpp
@@ -166,6 +166,69 @@ TEST_F(AddTest, Float)
}
}
+template <loco::DataType DType> void CheckInteger(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ Shape base_shape = {2, 3, 1, 2};
+ std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+ std::vector<std::vector<dtype>> test_outputs = {
+ {3, 3, 0, 1, 0, 8, 5, 1, 0, 0, 2, 6, 8, 0, 1, 0, 5, 1,
+ 5, 4, 0, 2, 2, 9, 11, 0, 4, 0, 8, 5, 11, 2, 4, 0, 8, 7},
+ {3, 3, 0, 0, 5, 1, 5, 4, 4, 0, 8, 7},
+ {3, 6, 0, 3, 0, 0, 5, 4, 2, 1, 0, 0, 8, 0, 5, 0, 1, 0,
+ 0, 2, 2, 4, 7, 9, 6, 0, 8, 0, 13, 5, 6, 0, 8, 2, 13, 7},
+ {3, 6, 2, 1, 1, 0, 0, 2, 8, 0, 13, 7}};
+ std::vector<dtype> input1_data{-1, 2, 1, 0, 4, -5, 1, 3, 7, -1, 7, 1};
+ std::vector<dtype> input2_data{4, 1, -3, -1, 1, 6};
+ for (size_t i = 0; i < test_shapes.size(); ++i)
+ {
+ Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+ Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DType);
+
+ AddParams params{};
+ params.activation = Activation::RELU;
+
+ Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+ << "With shape number " << i;
+ }
+ // Re-run with exchanged inputs.
+ for (size_t i = 0; i < test_shapes.size(); ++i)
+ {
+ Tensor input1_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+ Tensor input2_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DType);
+
+ AddParams params{};
+ params.activation = Activation::RELU;
+
+ Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+ << "With shape number " << i;
+ }
+};
+
+TEST_F(AddTest, SInt32)
+{
+ CheckInteger<loco::DataType::S32>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(AddTest, SInt64)
+{
+ CheckInteger<loco::DataType::S64>(_memory_manager.get());
+ SUCCEED();
+}
+
TEST_F(AddTest, SInt16)
{
Shape base_shape = {2, 3, 1, 2};
@@ -248,11 +311,24 @@ TEST_F(AddTest, Input_Output_Type_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
-TEST_F(AddTest, Invalid_Input_Type_NEG)
+TEST_F(AddTest, Invalid_Output_Type_NEG)
{
Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
- Tensor output_tensor = makeOutputTensor(DataType::S64);
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ AddParams params{};
+ params.activation = Activation::RELU;
+
+ Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(AddTest, Invalid_Input_Type_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U64);
AddParams params{};
params.activation = Activation::RELU;
@@ -263,6 +339,19 @@ TEST_F(AddTest, Invalid_Input_Type_NEG)
EXPECT_ANY_THROW(kernel.execute());
}
+TEST_F(AddTest, Invalid_Quantization_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::S16>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S16>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16);
+
+ AddParams params{};
+ params.activation = Activation::NONE;
+
+ Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
} // namespace
} // namespace kernels
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp b/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp
index 119c69ccf..474f4b321 100644
--- a/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp
@@ -57,7 +57,7 @@ template <typename T> class ArgMaxTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(ArgMaxTest, DataTypes);
+TYPED_TEST_SUITE(ArgMaxTest, DataTypes);
TYPED_TEST(ArgMaxTest, Simple)
{
diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp b/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp
index 5545fb4d4..d3bade9e4 100644
--- a/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp
@@ -18,8 +18,7 @@
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
-#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+#include "PALAveragePool2d.h"
#include <stdexcept>
@@ -29,8 +28,9 @@ namespace luci_interpreter
namespace kernels
{
-AveragePool2D::AveragePool2D(const Tensor *input, Tensor *output, const Pool2DParams &params)
- : KernelWithParams<Pool2DParams>({input}, {output}, params)
+AveragePool2D::AveragePool2D(const Tensor *input, Tensor *output, Tensor *scratchpad,
+ const Pool2DParams &params)
+ : KernelWithParams<Pool2DParams>({input}, {output, scratchpad}, params)
{
}
@@ -76,6 +76,10 @@ void AveragePool2D::configure()
LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point());
}
output()->resize({batches, output_height, output_width, depth});
+
+ auto scratchpad = getOutputTensors()[1];
+ luci_interpreter_pal::SetupScratchpadTensor(scratchpad, input()->element_type(),
+ getTensorShape(input()), getTensorShape(output()));
}
void AveragePool2D::execute() const
@@ -155,9 +159,14 @@ void AveragePool2D::evalSInt8() const
params.quantized_activation_min = activation_min;
params.quantized_activation_max = activation_max;
- tflite::reference_integer_ops::AveragePool(
+ auto scratchpad = getOutputTensors()[1];
+ int8_t *scratchpad_data = nullptr;
+ if (scratchpad->is_allocatable())
+ scratchpad_data = scratchpad->data<int8_t>();
+
+ luci_interpreter_pal::AveragePool<int8_t>(
params, getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(output()),
- getTensorData<int8_t>(output()));
+ getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
}
void AveragePool2D::evalSInt16() const
diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.h b/compiler/luci-interpreter/src/kernels/AveragePool2D.h
index b98367f31..2c8fe16e7 100644
--- a/compiler/luci-interpreter/src/kernels/AveragePool2D.h
+++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.h
@@ -28,7 +28,8 @@ namespace kernels
class AveragePool2D : public KernelWithParams<Pool2DParams>
{
public:
- AveragePool2D(const Tensor *input, Tensor *output, const Pool2DParams &params);
+ AveragePool2D(const Tensor *input, Tensor *output, Tensor *scratchpad,
+ const Pool2DParams &params);
const Tensor *input() const { return _inputs[0]; }
Tensor *output() const { return _outputs[0]; }
diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp b/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp
index 7ed421129..478bfa68e 100644
--- a/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp
@@ -46,6 +46,7 @@ TEST_F(AveragePool2DTest, Float)
Tensor input_tensor =
makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
Pool2DParams params{};
params.padding = Padding::VALID;
@@ -55,8 +56,9 @@ TEST_F(AveragePool2DTest, Float)
params.stride_width = 2;
params.activation = Activation::RELU6;
- AveragePool2D kernel(&input_tensor, &output_tensor, params);
+ AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
kernel.configure();
+ _memory_manager->allocate_memory(scratchpad);
_memory_manager->allocate_memory(output_tensor);
kernel.execute();
@@ -78,6 +80,7 @@ TEST_F(AveragePool2DTest, Uint8_0)
Tensor input_tensor = makeInputTensor<DataType::U8>(
{1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+ Tensor scratchpad(DataType::U8, Shape({}), {}, "");
Pool2DParams params{};
params.padding = Padding::VALID;
@@ -87,8 +90,9 @@ TEST_F(AveragePool2DTest, Uint8_0)
params.stride_width = 2;
params.activation = Activation::RELU6;
- AveragePool2D kernel(&input_tensor, &output_tensor, params);
+ AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
kernel.configure();
+ _memory_manager->allocate_memory(scratchpad);
_memory_manager->allocate_memory(output_tensor);
kernel.execute();
@@ -107,6 +111,7 @@ TEST_F(AveragePool2DTest, Uint8_1)
Tensor input_tensor = makeInputTensor<DataType::U8>(
{1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+ Tensor scratchpad(DataType::U8, Shape({}), {}, "");
Pool2DParams params{};
params.padding = Padding::VALID;
@@ -116,9 +121,10 @@ TEST_F(AveragePool2DTest, Uint8_1)
params.stride_width = 2;
params.activation = Activation::RELU6;
- AveragePool2D kernel(&input_tensor, &output_tensor, params);
+ AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
kernel.configure();
_memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(scratchpad);
kernel.execute();
EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({2.75, 6.0}));
@@ -141,6 +147,7 @@ TEST_F(AveragePool2DTest, SInt16)
Tensor input_tensor =
makeInputTensor<DataType::S16>(input_shape, 0.5, 0, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+ Tensor scratchpad(DataType::S16, Shape({}), {}, "");
Pool2DParams params{};
params.padding = Padding::VALID;
@@ -150,8 +157,9 @@ TEST_F(AveragePool2DTest, SInt16)
params.stride_width = 2;
params.activation = Activation::RELU6;
- AveragePool2D kernel(&input_tensor, &output_tensor, params);
+ AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
kernel.configure();
+ _memory_manager->allocate_memory(scratchpad);
_memory_manager->allocate_memory(output_tensor);
kernel.execute();
@@ -174,6 +182,7 @@ TEST_F(AveragePool2DTest, SInt8)
Tensor input_tensor = makeInputTensor<DataType::S8>(
input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S8, quant_param.first, quant_param.second);
+ Tensor scratchpad(DataType::S8, Shape({}), {}, "");
Pool2DParams params{};
params.padding = Padding::VALID;
@@ -183,8 +192,9 @@ TEST_F(AveragePool2DTest, SInt8)
params.stride_width = 2;
params.activation = Activation::RELU6;
- AveragePool2D kernel(&input_tensor, &output_tensor, params);
+ AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
kernel.configure();
+ _memory_manager->allocate_memory(scratchpad);
_memory_manager->allocate_memory(output_tensor);
kernel.execute();
@@ -203,6 +213,7 @@ TEST_F(AveragePool2DTest, Invalid_Input_Shape_NEG)
Tensor input_tensor =
makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
Pool2DParams params{};
params.padding = Padding::VALID;
@@ -212,7 +223,7 @@ TEST_F(AveragePool2DTest, Invalid_Input_Shape_NEG)
params.stride_width = 2;
params.activation = Activation::RELU6;
- AveragePool2D kernel(&input_tensor, &output_tensor, params);
+ AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
EXPECT_ANY_THROW(kernel.configure());
}
@@ -227,6 +238,7 @@ TEST_F(AveragePool2DTest, In_Out_Type_NEG)
Tensor input_tensor =
makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8);
+ Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
Pool2DParams params{};
params.padding = Padding::VALID;
@@ -236,7 +248,7 @@ TEST_F(AveragePool2DTest, In_Out_Type_NEG)
params.stride_width = 2;
params.activation = Activation::RELU6;
- AveragePool2D kernel(&input_tensor, &output_tensor, params);
+ AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
EXPECT_ANY_THROW(kernel.configure());
}
@@ -252,6 +264,7 @@ TEST_F(AveragePool2DTest, Quant_Param_NEG)
Tensor input_tensor = makeInputTensor<DataType::U8>(
{1, 2, 4, 1}, quant_param1.first, quant_param1.second, input_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param2.first, quant_param2.second);
+ Tensor scratchpad(DataType::U8, Shape({}), {}, "");
Pool2DParams params{};
params.padding = Padding::VALID;
@@ -261,7 +274,7 @@ TEST_F(AveragePool2DTest, Quant_Param_NEG)
params.stride_width = 2;
params.activation = Activation::RELU6;
- AveragePool2D kernel(&input_tensor, &output_tensor, params);
+ AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
EXPECT_ANY_THROW(kernel.configure());
}
diff --git a/compiler/luci-interpreter/src/kernels/BatchMatMul.cpp b/compiler/luci-interpreter/src/kernels/BatchMatMul.cpp
new file mode 100644
index 000000000..24ca22996
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/BatchMatMul.cpp
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchMatMul.h"
+#include "kernels/Utils.h"
+
+#include "PALBatchMatMul.h"
+
+#include <tensorflow/lite/kernels/internal/reference/transpose.h>
+
+#include <stdexcept>
+
+namespace
+{
+
+tflite::RuntimeShape SwapRowColumnDims(const tflite::RuntimeShape &shape)
+{
+ tflite::RuntimeShape swapped_shape(shape);
+ const int32_t dims = shape.DimensionsCount();
+ swapped_shape.SetDim(dims - 2, shape.Dims(dims - 1));
+ swapped_shape.SetDim(dims - 1, shape.Dims(dims - 2));
+ return swapped_shape;
+}
+
+} // namespace
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+BatchMatMul::BatchMatMul(const Tensor *x, const Tensor *y, Tensor *output, Tensor *x_tmp,
+ Tensor *y_tmp, const BatchMatMulParams &params)
+ : KernelWithParams({x, y}, {output, x_tmp, y_tmp}, params)
+{
+}
+
+void BatchMatMul::configure()
+{
+ auto lhs = x();
+ auto rhs = y();
+ auto adj_x = params().adj_x;
+ auto adj_y = params().adj_y;
+
+ // TODO Support non-float types
+ if (lhs->element_type() != DataType::FLOAT32 || rhs->element_type() != DataType::FLOAT32)
+ throw std::runtime_error("Unsupported type.");
+
+ LUCI_INTERPRETER_CHECK(lhs->element_type() == rhs->element_type());
+
+ auto lhs_rank = lhs->shape().num_dims();
+ auto rhs_rank = rhs->shape().num_dims();
+ LUCI_INTERPRETER_CHECK(lhs_rank >= 2 && lhs_rank <= 4);
+ LUCI_INTERPRETER_CHECK(rhs_rank >= 2 && rhs_rank <= 4);
+
+ auto lhs_scratchpad = temp_lhs();
+ auto rhs_scratchpad = temp_rhs();
+ luci_interpreter_pal::SetupScratchpadTensor(lhs_scratchpad, rhs_scratchpad, getTensorShape(lhs),
+ getTensorShape(rhs));
+
+ auto output_rank = std::max(lhs_rank, rhs_rank);
+
+ auto extended_lhs_shape = tflite::RuntimeShape::ExtendedShape(output_rank, getTensorShape(lhs));
+ auto extended_rhs_shape = tflite::RuntimeShape::ExtendedShape(output_rank, getTensorShape(rhs));
+
+ // Ensure any batch dimensions obey broacasting rules.
+ for (int i = 0; i < output_rank - 2; ++i)
+ {
+ const int lhs_dim = extended_lhs_shape.Dims(i);
+ const int rhs_dim = extended_rhs_shape.Dims(i);
+ if (lhs_dim != rhs_dim)
+ {
+ if (lhs_dim != 1)
+ {
+ LUCI_INTERPRETER_CHECK(rhs_dim == 1);
+ }
+ }
+ }
+
+ // Ensure other dimensions work for matrix multiplication.
+ int accum_dim_lhs =
+ adj_x ? extended_lhs_shape.Dims(output_rank - 2) : extended_lhs_shape.Dims(output_rank - 1);
+ int accum_dim_rhs =
+ adj_y ? extended_rhs_shape.Dims(output_rank - 1) : extended_rhs_shape.Dims(output_rank - 2);
+ LUCI_INTERPRETER_CHECK(accum_dim_lhs == accum_dim_rhs);
+
+ Shape output_shape(output_rank);
+ // Fill in any broadcast dimensions.
+ for (int i = 0; i < output_rank - 2; ++i)
+ {
+ const int lhs_dim = extended_lhs_shape.Dims(i);
+ const int rhs_dim = extended_rhs_shape.Dims(i);
+ int broadcast_dim = lhs_dim;
+ if ((lhs_dim != rhs_dim) && (lhs_dim == 1))
+ {
+ broadcast_dim = rhs_dim;
+ }
+ output_shape.dim(i) = broadcast_dim;
+ }
+ // Fill in the matmul dimensions.
+ int lhs_rows_index = adj_x ? output_rank - 1 : output_rank - 2;
+ int rhs_cols_index = adj_y ? output_rank - 2 : output_rank - 1;
+
+ output_shape.dim(output_rank - 2) = extended_lhs_shape.Dims(lhs_rows_index);
+ output_shape.dim(output_rank - 1) = extended_rhs_shape.Dims(rhs_cols_index);
+
+ output()->resize(output_shape);
+}
+
+void TransposeRowsColumns(const Tensor *tensor_in, Tensor *tensor_out)
+{
+ tflite::RuntimeShape transposed_shape(getTensorShape(tensor_in));
+ tflite::RuntimeShape shape(getTensorShape(tensor_in));
+ tflite::TransposeParams params;
+ int rank = shape.DimensionsCount();
+ params.perm_count = rank;
+ for (int i = 0; i < rank - 2; ++i)
+ {
+ params.perm[i] = i;
+ }
+ // Transpose the last two dimensions.
+ params.perm[rank - 2] = rank - 1;
+ params.perm[rank - 1] = rank - 2;
+ transposed_shape.SetDim(rank - 1, shape.Dims(rank - 2));
+ transposed_shape.SetDim(rank - 2, shape.Dims(rank - 1));
+ switch (tensor_in->element_type())
+ {
+ case DataType::FLOAT32:
+ tflite::reference_ops::Transpose(params, shape, getTensorData<float>(tensor_in),
+ transposed_shape, getTensorData<float>(tensor_out));
+ break;
+ default:
+ throw std::runtime_error("Only suppport fp32 BatchMatMul for now.");
+ }
+}
+
+void BatchMatMul::execute() const
+{
+ auto lhs = x();
+ auto rhs = y();
+
+ bool adj_x = params().adj_x;
+ bool adj_y = params().adj_y;
+
+ auto orig_lhs_shape = getTensorShape(lhs);
+ auto orig_rhs_shape = getTensorShape(rhs);
+
+ auto rhs_tensor = adj_y ? rhs : temp_rhs();
+ auto lhs_tensor = adj_x ? temp_lhs() : lhs;
+ if (not adj_y)
+ {
+ TransposeRowsColumns(rhs, temp_rhs());
+ }
+ if (adj_x)
+ {
+ TransposeRowsColumns(lhs, temp_lhs());
+ }
+ tflite::RuntimeShape rhs_shape = adj_y ? orig_rhs_shape : SwapRowColumnDims(orig_rhs_shape);
+ tflite::RuntimeShape lhs_shape = adj_x ? orig_lhs_shape : SwapRowColumnDims(orig_lhs_shape);
+
+ switch (x()->element_type())
+ {
+ case DataType::FLOAT32:
+ luci_interpreter_pal::BatchMatMul(rhs_shape, getTensorData<float>(rhs_tensor), lhs_shape,
+ getTensorData<float>(lhs_tensor), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/BatchMatMul.h b/compiler/luci-interpreter/src/kernels/BatchMatMul.h
new file mode 100644
index 000000000..744f49795
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/BatchMatMul.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H
+#define LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class BatchMatMul : public KernelWithParams<BatchMatMulParams>
+{
+public:
+ BatchMatMul(const Tensor *x, const Tensor *y, Tensor *output, Tensor *x_tmp, Tensor *y_tmp,
+ const BatchMatMulParams &params);
+
+ const Tensor *x() const { return _inputs[0]; }
+ const Tensor *y() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ Tensor *temp_lhs() const { return _outputs[1]; }
+ Tensor *temp_rhs() const { return _outputs[2]; }
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H
diff --git a/compiler/luci-interpreter/src/kernels/BatchMatMul.test.cpp b/compiler/luci-interpreter/src/kernels/BatchMatMul.test.cpp
new file mode 100644
index 000000000..edfa3a685
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/BatchMatMul.test.cpp
@@ -0,0 +1,272 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchMatMul.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class BatchMatMulTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(BatchMatMulTest, Float)
+{
+ std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6};
+ std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+ Tensor lhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 2, 3}, lhs_data, _memory_manager.get());
+ Tensor rhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 3, 4}, rhs_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+ BatchMatMulParams params;
+ params.adj_x = false;
+ params.adj_y = false;
+
+ BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(lhs_scratch);
+ _memory_manager->allocate_memory(rhs_scratch);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.}));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4}));
+}
+
+TEST_F(BatchMatMulTest, Float_SimpleRHSAdjoint)
+{
+ std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6};
+ std::vector<float> rhs_data = {7, 11, 15, 8, 12, 16, 9, 13, 17, 10, 14, 18};
+ Tensor lhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 2, 3}, lhs_data, _memory_manager.get());
+ Tensor rhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 4, 3}, rhs_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+ BatchMatMulParams params;
+ params.adj_x = false;
+ params.adj_y = true;
+
+ BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(lhs_scratch);
+ _memory_manager->allocate_memory(rhs_scratch);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.}));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4}));
+}
+
+TEST_F(BatchMatMulTest, Float_SimpleLHSAdjoint)
+{
+ std::vector<float> lhs_data = {1, 4, 2, 5, 3, 6};
+ std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+ Tensor lhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 3, 2}, lhs_data, _memory_manager.get());
+ Tensor rhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 3, 4}, rhs_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+ BatchMatMulParams params;
+ params.adj_x = true;
+ params.adj_y = false;
+
+ BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(lhs_scratch);
+ _memory_manager->allocate_memory(rhs_scratch);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.}));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4}));
+}
+
+TEST_F(BatchMatMulTest, Float_BatchSizeTwo)
+{
+ std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+ std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30};
+ Tensor lhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 2, 3}, lhs_data, _memory_manager.get());
+ Tensor rhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 3, 4}, rhs_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+ BatchMatMulParams params;
+ params.adj_x = false;
+ params.adj_y = false;
+
+ BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(lhs_scratch);
+ _memory_manager->allocate_memory(rhs_scratch);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218., 560., 584., 608., 632.,
+ 767., 800., 833., 866.}));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 2, 4}));
+}
+
+TEST_F(BatchMatMulTest, Float_DiffBatch)
+{
+ std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+ std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30};
+ Tensor lhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 1, 6}, lhs_data, _memory_manager.get());
+ Tensor rhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 6, 4}, rhs_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+ BatchMatMulParams params;
+ params.adj_x = false;
+ params.adj_y = false;
+
+ BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(lhs_scratch);
+ _memory_manager->allocate_memory(rhs_scratch);
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ FloatArrayNear({427., 448., 469., 490., 1039., 1096., 1153., 1210.}));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 1, 4}));
+}
+
+TEST_F(BatchMatMulTest, Invalid_Shape_NEG)
+{
+ Tensor lhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 2, 2}, {1, 2, 3, 4}, _memory_manager.get());
+ Tensor rhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 3, 2}, {5, 6, 7, 8, 9, 10}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+ BatchMatMulParams params;
+ params.adj_x = false;
+ params.adj_y = false;
+
+ BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(BatchMatMulTest, Invalid_Batch_NEG)
+{
+ Tensor lhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 1, 3}, {1, 2, 3, 4, 5, 6}, _memory_manager.get());
+ Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({3, 3, 1}, {5, 6, 7, 8, 9, 10, 11, 12, 13},
+ _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+ BatchMatMulParams params;
+ params.adj_x = false;
+ params.adj_y = false;
+
+ BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(BatchMatMulTest, Invalid_Rank_NEG)
+{
+ Tensor lhs_tensor = makeInputTensor<DataType::FLOAT32>({4}, {1, 2, 3, 4}, _memory_manager.get());
+ Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({1, 4, 2}, {5, 6, 7, 8, 9, 10, 11, 12},
+ _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+ BatchMatMulParams params;
+ params.adj_x = false;
+ params.adj_y = false;
+
+ BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(BatchMatMulTest, Invalid_Rank2_NEG)
+{
+ Tensor lhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 1, 1, 1, 4}, {1, 2, 3, 4}, _memory_manager.get());
+ Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({1, 4, 2}, {5, 6, 7, 8, 9, 10, 11, 12},
+ _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+ BatchMatMulParams params;
+ params.adj_x = false;
+ params.adj_y = false;
+
+ BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(BatchMatMulTest, TypeMisMatch_NEG)
+{
+ Tensor lhs_tensor =
+ makeInputTensor<DataType::U8>({1, 2, 3}, {1, 2, 3, 4, 5, 6}, _memory_manager.get());
+ Tensor rhs_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 3, 2}, {5, 6, 7, 8, 9, 10}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor lhs_scratch(DataType::U8, Shape({}), {}, "");
+ Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+ BatchMatMulParams params;
+ params.adj_x = false;
+ params.adj_y = false;
+
+ BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp
index f3a344974..52647a763 100644
--- a/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp
@@ -58,7 +58,7 @@ template <typename T> class BatchToSpaceNDTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(BatchToSpaceNDTest, DataTypes);
+TYPED_TEST_SUITE(BatchToSpaceNDTest, DataTypes);
TYPED_TEST(BatchToSpaceNDTest, Simple)
{
diff --git a/compiler/luci-interpreter/src/kernels/CMakeLists.txt b/compiler/luci-interpreter/src/kernels/CMakeLists.txt
index 1b7d0f66a..9f4ba0e0b 100644
--- a/compiler/luci-interpreter/src/kernels/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/kernels/CMakeLists.txt
@@ -15,7 +15,9 @@ endmacro(REGISTER_KERNEL)
include(${KERNEL_REGISTER_FILE})
add_library(${LUCI_INTERPRETER_KERNELS} STATIC ${SOURCES})
-set_target_properties(${LUCI_INTERPRETER_KERNELS} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+ set_target_properties(${LUCI_INTERPRETER_KERNELS} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
target_include_directories(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_SOURCE_DIR})
target_link_libraries(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_CORE})
diff --git a/compiler/luci-interpreter/src/kernels/Cast.test.cpp b/compiler/luci-interpreter/src/kernels/Cast.test.cpp
index 731260522..4713ad34c 100644
--- a/compiler/luci-interpreter/src/kernels/Cast.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Cast.test.cpp
@@ -79,7 +79,7 @@ template <typename T> class CastTest : public ::testing::Test
using IntDataTypes =
::testing::Types<uint8_t, uint16_t, uint32_t, uint64_t, int8_t, int16_t, int32_t, int64_t>;
-TYPED_TEST_CASE(CastTest, IntDataTypes);
+TYPED_TEST_SUITE(CastTest, IntDataTypes);
TYPED_TEST(CastTest, FloatToInt)
{
diff --git a/compiler/luci-interpreter/src/kernels/Concatenation.cpp b/compiler/luci-interpreter/src/kernels/Concatenation.cpp
index 7cfdf34b9..46ee5941e 100644
--- a/compiler/luci-interpreter/src/kernels/Concatenation.cpp
+++ b/compiler/luci-interpreter/src/kernels/Concatenation.cpp
@@ -69,11 +69,21 @@ void Concatenation::configure()
Shape output_shape = t0->shape();
output_shape.dim(axis) = sum_axis;
- // TODO S8 type needs more checking: quantization parameters of all input tensors and the output
- // tensor should be the same. Note that there is no such requirement for U8 type.
- if (t0->element_type() == DataType::S8)
- throw std::runtime_error("Unsupported type.");
+ // If input tensors are INT8 type then quantization parameters of all input tensors and the output
+ // should be the same
+ for (auto current_tensor : _inputs)
+ {
+ if (current_tensor->element_type() == DataType::S8)
+ {
+ LUCI_INTERPRETER_CHECK(current_tensor->quantized_dimension() ==
+ output()->quantized_dimension());
+ LUCI_INTERPRETER_CHECK(current_tensor->zero_points().size() ==
+ current_tensor->scales().size());
+ LUCI_INTERPRETER_CHECK(current_tensor->zero_points() == output()->zero_points());
+ LUCI_INTERPRETER_CHECK(current_tensor->scales() == output()->scales());
+ }
+ }
output()->resize(output_shape);
}
diff --git a/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp b/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp
index e4b50611a..f893b38fd 100644
--- a/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp
@@ -183,12 +183,12 @@ TEST_F(ConcatenationTest, Mismatching_Input_Dimension_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
-TEST_F(ConcatenationTest, Unsupported_Configure_Type_NEG)
+TEST_F(ConcatenationTest, Int8_Mismatching_Input_Type_NEG)
{
- std::vector<int8_t> input1_data{1, 2, 3, 4, 5, 6};
- std::vector<int8_t> input2_data{7, 8, 9, 10, 11, 12};
- Tensor input1_tensor = makeInputTensor<DataType::S8>({2, 3}, input1_data, _memory_manager.get());
- Tensor input2_tensor = makeInputTensor<DataType::S8>({2, 3}, input2_data, _memory_manager.get());
+ std::vector<uint8_t> input1_data{1, 2, 3, 4};
+ std::vector<int8_t> input2_data{5, 6, 7, 8};
+ Tensor input1_tensor = makeInputTensor<DataType::U8>({2, 2}, input1_data, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S8>({2, 2}, input2_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S8);
ConcatenationParams params{};
@@ -199,6 +199,51 @@ TEST_F(ConcatenationTest, Unsupported_Configure_Type_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
+TEST_F(ConcatenationTest, Int8_Mismatching_Input_Output_Quant_Params_NEG)
+{
+ std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+ std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+ int quantized_dimension = 3;
+ std::vector<float> scales{0.1, 0.2, 0.3};
+ std::vector<int32_t> zero_points{1, -1, 1};
+
+ Tensor input1_tensor = makeInputTensor<DataType::S8>(
+ {1, 1, 2, 3}, scales, zero_points, quantized_dimension, input1_data, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S8>(
+ {1, 1, 2, 3}, scales, zero_points, quantized_dimension, input2_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S8, scales.at(0), zero_points.at(0));
+ ConcatenationParams params{};
+
+ params.axis = -1;
+ params.activation = luci::FusedActFunc::NONE;
+
+ Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Int8_Mismatching_Zero_Point_NEG)
+{
+ std::vector<float> input1_data{1, 2, 3, 4};
+ std::vector<float> input2_data{5, 6, 7, 8};
+ float scale = 0.1;
+ int32_t zero_point_1 = 1;
+ int32_t zero_point_2 = -1;
+
+ Tensor input1_tensor =
+ makeInputTensor<DataType::S8>({2, 2}, scale, zero_point_1, input1_data, _memory_manager.get());
+ Tensor input2_tensor =
+ makeInputTensor<DataType::S8>({2, 2}, scale, zero_point_2, input2_data, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::S8, scale, zero_point_1);
+ ConcatenationParams params{};
+
+ params.axis = -1;
+ params.activation = luci::FusedActFunc::NONE;
+
+ Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
// TODO: Remove this test when concat w/ fused_activation is supported
TEST_F(ConcatenationTest, With_Fused_Activation_NEG)
{
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.cpp
index 5647f4c44..234f95425 100644
--- a/compiler/luci-interpreter/src/kernels/Conv2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.cpp
@@ -30,8 +30,8 @@ namespace kernels
{
Conv2D::Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
- Tensor *im2col, const Conv2DParams &params)
- : KernelWithParams<Conv2DParams>({input, filter, bias}, {output, im2col}, params)
+ Tensor *scratchpad, const Conv2DParams &params)
+ : KernelWithParams<Conv2DParams>({input, filter, bias}, {output, scratchpad}, params)
{
}
@@ -108,27 +108,18 @@ void Conv2D::configure()
output()->resize({batches, output_height, output_width, output_depth});
- // Allocate tensor for Im2Col, if needed.
- // The checks here should be aligned with the actual implementation.
- const bool need_dilated_im2col =
- _params.dilation_height_factor != 1 || _params.dilation_width_factor != 1;
- const bool need_non_dilated_im2col = _params.stride_height != 1 || _params.stride_width != 1 ||
- filter_height != 1 || filter_width != 1;
- _need_im2col =
- input()->element_type() != DataType::S16 && (need_dilated_im2col || need_non_dilated_im2col);
- if (_need_im2col)
- {
- const int input_depth = input_shape.dim(3);
- Shape im2col_shape{batches, output_height, output_width,
- input_depth * filter_height * filter_width};
- auto im2col = getOutputTensors()[1];
- im2col->resize(im2col_shape);
- }
- else
- {
- auto im2col = getOutputTensors()[1];
- im2col->set_allocatable(false);
- }
+ // Allocate tensor for scratchpad, if needed.
+ tflite::ConvParams params{};
+ params.padding_values.height = _padding_height;
+ params.padding_values.width = _padding_width;
+ params.stride_height = _params.stride_height;
+ params.stride_width = _params.stride_width;
+ params.dilation_height_factor = _params.dilation_height_factor;
+ params.dilation_width_factor = _params.dilation_width_factor;
+ auto scratchpad = getOutputTensors()[1];
+ luci_interpreter_pal::SetupScratchpadTensor(scratchpad, input()->element_type(), params,
+ getTensorShape(input()), getTensorShape(filter()),
+ getTensorShape(output()));
switch (_params.activation)
{
@@ -193,16 +184,16 @@ void Conv2D::evalFloat() const
params.float_activation_min = activation_min;
params.float_activation_max = activation_max;
- float *im2col_data = nullptr;
- auto im2col = getOutputTensors()[1];
- if (_need_im2col)
- {
- im2col_data = im2col->data<float>();
- }
- luci_interpreter_pal::Conv(
- params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
- getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
- getTensorShape(output()), getTensorData<float>(output()), getTensorShape(im2col), im2col_data);
+ auto scratchpad = getOutputTensors()[1];
+ float *scratchpad_data = nullptr;
+ if (scratchpad->is_allocatable())
+ scratchpad_data = scratchpad->data<float>();
+
+ luci_interpreter_pal::Conv(params, getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(filter()), getTensorData<float>(filter()),
+ getTensorShape(bias()), getTensorData<float>(bias()),
+ getTensorShape(output()), getTensorData<float>(output()),
+ getTensorShape(scratchpad), scratchpad_data);
}
void Conv2D::evalQuantized() const
@@ -236,12 +227,12 @@ void Conv2D::evalQuantized() const
params.quantized_activation_min = activation_min;
params.quantized_activation_max = activation_max;
- auto im2col = getOutputTensors()[1];
+ auto scratchpad = getOutputTensors()[1];
luci_interpreter_pal::Conv(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
getTensorShape(filter()), getTensorData<uint8_t>(filter()),
getTensorShape(bias()), getTensorData<int32_t>(bias()),
getTensorShape(output()), getTensorData<uint8_t>(output()),
- getTensorShape(im2col), getTensorData<uint8_t>(im2col));
+ getTensorShape(scratchpad), getTensorData<uint8_t>(scratchpad));
}
void Conv2D::evalQuantizedPerChannel() const
@@ -364,18 +355,16 @@ void Conv2D::evalQuantizedS8PerChannel() const
std::back_inserter(multipliers),
[](ChannelQuantMultipliers cm) { return cm.multiplier; });
- int8_t *im2col_data = nullptr;
- auto im2col = getOutputTensors()[1];
- if (_need_im2col)
- {
- im2col_data = im2col->data<int8_t>();
- }
+ auto scratchpad = getOutputTensors()[1];
+ int8_t *scratchpad_data = nullptr;
+ if (scratchpad->is_allocatable())
+ scratchpad_data = scratchpad->data<int8_t>();
luci_interpreter_pal::ConvPerChannel(
params, multipliers.data(), shifts.data(), getTensorShape(input()),
getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()),
getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()),
- getTensorData<int8_t>(output()), getTensorShape(im2col), im2col_data);
+ getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
}
void Conv2D::evalQuantizedS16() const
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.h b/compiler/luci-interpreter/src/kernels/Conv2D.h
index 5f1317638..330bf3a2a 100644
--- a/compiler/luci-interpreter/src/kernels/Conv2D.h
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.h
@@ -31,7 +31,7 @@ class Conv2D : public KernelWithParams<Conv2DParams>
{
public:
Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
- Tensor *im2col, const Conv2DParams &params);
+ Tensor *scratchpad, const Conv2DParams &params);
const Tensor *input() const { return _inputs[0]; }
const Tensor *filter() const { return _inputs[1]; }
@@ -49,7 +49,6 @@ private:
void evalQuantizedS16() const;
private:
- bool _need_im2col = false;
int32_t _padding_height{};
int32_t _padding_width{};
};
diff --git a/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp b/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp
index 9b1c09ba9..88e6e07f1 100644
--- a/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp
@@ -32,7 +32,7 @@ template <typename T> class DepthToSpaceTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(DepthToSpaceTest, DataTypes);
+TYPED_TEST_SUITE(DepthToSpaceTest, DataTypes);
TYPED_TEST(DepthToSpaceTest, SimpleCase)
{
diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
index f2dbf6c68..c554c309d 100644
--- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
@@ -18,9 +18,7 @@
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
-#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
-#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h>
+#include "PALDepthwiseConv2d.h"
#include <stdexcept>
@@ -30,8 +28,9 @@ namespace kernels
{
DepthwiseConv2D::DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias,
- Tensor *output, const DepthwiseConv2DParams &params)
- : KernelWithParams<DepthwiseConv2DParams>({input, filter, bias}, {output}, params)
+ Tensor *output, Tensor *scratchpad,
+ const DepthwiseConv2DParams &params)
+ : KernelWithParams<DepthwiseConv2DParams>({input, filter, bias}, {output, scratchpad}, params)
{
}
@@ -109,6 +108,16 @@ void DepthwiseConv2D::configure()
filter_width, output_width);
output()->resize({batches, output_height, output_width, channels_out});
+
+ tflite::DepthwiseParams params{};
+
+ params.dilation_height_factor = _params.dilation_height_factor;
+ params.dilation_width_factor = _params.dilation_width_factor;
+
+ auto scratchpad = getOutputTensors()[1];
+ luci_interpreter_pal::SetupScratchpadTensor(scratchpad, params, input()->element_type(),
+ getTensorShape(input()), getTensorShape(filter()),
+ getTensorShape(output()));
}
void DepthwiseConv2D::execute() const
@@ -337,11 +346,16 @@ void DepthwiseConv2D::evalQuantizedS8PerChannel() const
std::back_inserter(multipliers),
[](ChannelQuantMultipliers cm) { return cm.multiplier; });
- tflite::reference_integer_ops::DepthwiseConvPerChannel(
+ auto scratchpad = getOutputTensors()[1];
+ int8_t *scratchpad_data = nullptr;
+ if (scratchpad->is_allocatable())
+ scratchpad_data = scratchpad->data<int8_t>();
+
+ luci_interpreter_pal::DepthwiseConvPerChannel<int8_t>(
params, multipliers.data(), shifts.data(), getTensorShape(input()),
getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()),
getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()),
- getTensorData<int8_t>(output()));
+ getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
}
void DepthwiseConv2D::evalQuantizedS16() const
diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h
index 6cffd6583..3d1faf6c1 100644
--- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h
+++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h
@@ -29,7 +29,7 @@ class DepthwiseConv2D : public KernelWithParams<DepthwiseConv2DParams>
{
public:
DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
- const DepthwiseConv2DParams &params);
+ Tensor *scratchpad, const DepthwiseConv2DParams &params);
const Tensor *input() const { return _inputs[0]; }
const Tensor *filter() const { return _inputs[1]; }
diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp
index 74975899a..6b4673f3e 100644
--- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp
@@ -59,6 +59,7 @@ TEST_F(DepthwiseConv2DTest, Float)
makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
Tensor bias_tensor =
makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+ Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
DepthwiseConv2DParams params{};
@@ -70,8 +71,10 @@ TEST_F(DepthwiseConv2DTest, Float)
params.dilation_width_factor = 1;
params.activation = Activation::RELU;
- DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
kernel.configure();
+ _memory_manager->allocate_memory(scratchpad);
_memory_manager->allocate_memory(output_tensor);
kernel.execute();
@@ -111,6 +114,7 @@ TEST_F(DepthwiseConv2DTest, Uint8)
{4}, input_quant_param.first * input_quant_param.first, 0, bias_data, _memory_manager.get());
Tensor output_tensor =
makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+ Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
DepthwiseConv2DParams params{};
params.padding = Padding::VALID;
@@ -121,9 +125,11 @@ TEST_F(DepthwiseConv2DTest, Uint8)
params.dilation_width_factor = 1;
params.activation = Activation::NONE;
- DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
kernel.configure();
_memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(scratchpad);
kernel.execute();
std::vector<float> ref_output_data{
@@ -166,6 +172,7 @@ TEST_F(DepthwiseConv2DTest, SInt16)
Tensor bias_tensor =
makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+ Tensor scratchpad(DataType::S64, Shape({}), {}, "");
DepthwiseConv2DParams params{};
params.padding = Padding::VALID;
@@ -176,9 +183,11 @@ TEST_F(DepthwiseConv2DTest, SInt16)
params.dilation_width_factor = 1;
params.activation = Activation::RELU;
- DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
kernel.configure();
_memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(scratchpad);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
@@ -224,6 +233,7 @@ TEST_F(DepthwiseConv2DTest, SInt16_CWQ_weights)
Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data,
_memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+ Tensor scratchpad(DataType::S16, Shape({}), {}, "");
DepthwiseConv2DParams params{};
params.padding = Padding::VALID;
@@ -234,9 +244,11 @@ TEST_F(DepthwiseConv2DTest, SInt16_CWQ_weights)
params.dilation_width_factor = 1;
params.activation = Activation::RELU;
- DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
kernel.configure();
_memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(scratchpad);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
@@ -299,6 +311,7 @@ TEST_F(DepthwiseConv2DTest, Uint8_CWQ_weights)
_memory_manager.get());
Tensor output_tensor =
makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+ Tensor scratchpad(DataType::U8, Shape({}), {}, "");
DepthwiseConv2DParams params{};
params.padding = Padding::VALID;
@@ -309,9 +322,11 @@ TEST_F(DepthwiseConv2DTest, Uint8_CWQ_weights)
params.dilation_width_factor = 1;
params.activation = Activation::NONE;
- DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
kernel.configure();
_memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(scratchpad);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
@@ -375,6 +390,7 @@ TEST_F(DepthwiseConv2DTest, SInt8_CWQ_weights)
_memory_manager.get());
Tensor output_tensor =
makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+ Tensor scratchpad(DataType::S8, Shape({}), {}, "");
DepthwiseConv2DParams params{};
params.padding = Padding::VALID;
@@ -385,9 +401,11 @@ TEST_F(DepthwiseConv2DTest, SInt8_CWQ_weights)
params.dilation_width_factor = 1;
params.activation = Activation::NONE;
- DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
kernel.configure();
_memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(scratchpad);
kernel.execute();
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
@@ -419,6 +437,7 @@ TEST_F(DepthwiseConv2DTest, InvalidBiasType_NEG)
makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
DepthwiseConv2DParams params{};
params.padding = Padding::VALID;
@@ -429,7 +448,8 @@ TEST_F(DepthwiseConv2DTest, InvalidBiasType_NEG)
params.dilation_width_factor = 1;
params.activation = Activation::RELU;
- DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
EXPECT_ANY_THROW(kernel.configure());
}
@@ -458,6 +478,7 @@ TEST_F(DepthwiseConv2DTest, InOutTypeMismatch_NEG)
Tensor bias_tensor =
makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::U8);
+ Tensor scratchpad(DataType::U8, Shape({}), {}, "");
DepthwiseConv2DParams params{};
params.padding = Padding::VALID;
@@ -468,7 +489,8 @@ TEST_F(DepthwiseConv2DTest, InOutTypeMismatch_NEG)
params.dilation_width_factor = 1;
params.activation = Activation::RELU;
- DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
EXPECT_ANY_THROW(kernel.configure());
}
@@ -497,6 +519,7 @@ TEST_F(DepthwiseConv2DTest, InvalidInputShape_NEG)
Tensor bias_tensor =
makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
DepthwiseConv2DParams params{};
params.padding = Padding::VALID;
@@ -507,7 +530,8 @@ TEST_F(DepthwiseConv2DTest, InvalidInputShape_NEG)
params.dilation_width_factor = 1;
params.activation = Activation::RELU;
- DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
EXPECT_ANY_THROW(kernel.configure());
}
@@ -536,6 +560,7 @@ TEST_F(DepthwiseConv2DTest, InvalidFilterShape_NEG)
Tensor bias_tensor =
makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
DepthwiseConv2DParams params{};
params.padding = Padding::VALID;
@@ -546,7 +571,8 @@ TEST_F(DepthwiseConv2DTest, InvalidFilterShape_NEG)
params.dilation_width_factor = 1;
params.activation = Activation::RELU;
- DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
EXPECT_ANY_THROW(kernel.configure());
}
@@ -575,6 +601,7 @@ TEST_F(DepthwiseConv2DTest, InvalidBiasDim_NEG)
Tensor bias_tensor =
makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
DepthwiseConv2DParams params{};
params.padding = Padding::VALID;
@@ -585,7 +612,8 @@ TEST_F(DepthwiseConv2DTest, InvalidBiasDim_NEG)
params.dilation_width_factor = 1;
params.activation = Activation::RELU;
- DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+ DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+ params);
EXPECT_ANY_THROW(kernel.configure());
}
diff --git a/compiler/luci-interpreter/src/kernels/Dequantize.cpp b/compiler/luci-interpreter/src/kernels/Dequantize.cpp
new file mode 100644
index 000000000..96399e5c7
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Dequantize.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Dequantize.h"
+#include "kernels/Utils.h"
+#include "PALDequantize.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Dequantize::Dequantize(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Dequantize::configure()
+{
+ LUCI_INTERPRETER_CHECK(input()->element_type() == loco::DataType::S8 ||
+ input()->element_type() == loco::DataType::U8 ||
+ input()->element_type() == loco::DataType::S16);
+
+ LUCI_INTERPRETER_CHECK(input()->scales().size() == 1);
+
+ if (input()->element_type() == loco::DataType::S16)
+ LUCI_INTERPRETER_CHECK(input()->zero_point() == 0);
+
+ LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::FLOAT32);
+
+ output()->resize(input()->shape());
+}
+
+void Dequantize::execute() const
+{
+ tflite::DequantizationParams op_params;
+ op_params.zero_point = input()->zero_point();
+ op_params.scale = input()->scale();
+
+ switch (input()->element_type())
+ {
+ case loco::DataType::U8:
+ {
+ luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
+ getTensorData<uint8_t>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ }
+ case loco::DataType::S8:
+ {
+ luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
+ getTensorData<int8_t>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ }
+ case loco::DataType::S16:
+ {
+ luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
+ getTensorData<int16_t>(input()), getTensorShape(output()),
+ getTensorData<float>(output()));
+ break;
+ }
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Dequantize.h b/compiler/luci-interpreter/src/kernels/Dequantize.h
new file mode 100644
index 000000000..5565df0e4
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Dequantize.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H
+#define LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Dequantize : public Kernel
+{
+public:
+ Dequantize(const Tensor *input, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H
diff --git a/compiler/luci-interpreter/src/kernels/Dequantize.test.cpp b/compiler/luci-interpreter/src/kernels/Dequantize.test.cpp
new file mode 100644
index 000000000..0cab633d6
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Dequantize.test.cpp
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Dequantize.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class DequantizeTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(DequantizeTest, Uint8)
+{
+ std::vector<uint8_t> input_data{0, 1, 2, 3, 4, 251, 252, 253, 254, 255};
+
+ std::vector<float> ref_output_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64};
+
+ Tensor input_tensor(loco::DataType::U8, {2, 5}, {{0.5}, {127}}, "");
+
+ _memory_manager->allocate_memory(input_tensor);
+ input_tensor.writeData(input_data.data(), input_data.size() * sizeof(uint8_t));
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Dequantize kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(DequantizeTest, Sint8)
+{
+ std::vector<int8_t> input_data{-128, -127, -126, -125, -124, 123, 124, 125, 126, 127};
+
+ std::vector<float> ref_output_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64};
+
+ Tensor input_tensor(loco::DataType::S8, {2, 5}, {{0.5}, {-1}}, "");
+
+ _memory_manager->allocate_memory(input_tensor);
+ input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int8_t));
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Dequantize kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(DequantizeTest, Sint16)
+{
+ std::vector<int16_t> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131};
+
+ std::vector<float> ref_output_data{-64.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 65.5};
+
+ Tensor input_tensor(loco::DataType::S16, {2, 5}, {{0.5}, {0}}, "");
+
+ _memory_manager->allocate_memory(input_tensor);
+ input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int16_t));
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Dequantize kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(DequantizeTest, InvalidInputType_NEG)
+{
+ std::vector<float> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Dequantize kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DequantizeTest, InvalidOutputType_NEG)
+{
+ std::vector<int16_t> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131};
+
+ Tensor input_tensor(loco::DataType::S16, {2, 5}, {{0.5}, {0}}, "");
+
+ _memory_manager->allocate_memory(input_tensor);
+ input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int16_t));
+
+ Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+ Dequantize kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DequantizeTest, InvalidInputZeroPoint_NEG)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>({2, 5}, 0.5, -1, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Dequantize kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Div.cpp b/compiler/luci-interpreter/src/kernels/Div.cpp
index 0e52ba1f0..dd1532278 100644
--- a/compiler/luci-interpreter/src/kernels/Div.cpp
+++ b/compiler/luci-interpreter/src/kernels/Div.cpp
@@ -46,6 +46,12 @@ void Div::execute() const
case DataType::FLOAT32:
evalFloat();
break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
case DataType::U8:
evalQuantized();
break;
@@ -56,13 +62,9 @@ void Div::execute() const
void Div::evalFloat() const
{
- float activation_min{};
- float activation_max{};
- calculateActivationRange(_params.activation, &activation_min, &activation_max);
-
tflite::ArithmeticParams params{};
- params.float_activation_min = activation_min;
- params.float_activation_max = activation_max;
+ fillArithmeticActivationRange<float>(params, _params.activation);
+
const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
getTensorShape(input1()), getTensorShape(input2()), &params);
@@ -80,6 +82,28 @@ void Div::evalFloat() const
}
}
+template <typename T> void Div::evalInteger() const
+{
+ tflite::ArithmeticParams params{};
+ fillArithmeticActivationRange<T>(params, _params.activation);
+
+ const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+ getTensorShape(input1()), getTensorShape(input2()), &params);
+
+ if (need_broadcast)
+ {
+ tflite::reference_ops::BroadcastDivSlow(
+ params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+ getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+ }
+ else
+ {
+ tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<T>(input1()),
+ getTensorShape(input2()), getTensorData<T>(input2()),
+ getTensorShape(output()), getTensorData<T>(output()));
+ }
+}
+
void Div::evalQuantized() const
{
const auto input1_scale = static_cast<double>(input1()->scale());
diff --git a/compiler/luci-interpreter/src/kernels/Div.h b/compiler/luci-interpreter/src/kernels/Div.h
index 6040cdd02..c1bf3e10b 100644
--- a/compiler/luci-interpreter/src/kernels/Div.h
+++ b/compiler/luci-interpreter/src/kernels/Div.h
@@ -39,6 +39,7 @@ public:
private:
void evalFloat() const;
+ template <typename T> void evalInteger() const;
void evalQuantized() const;
};
diff --git a/compiler/luci-interpreter/src/kernels/Div.test.cpp b/compiler/luci-interpreter/src/kernels/Div.test.cpp
index 021d68d06..85cd8b90a 100644
--- a/compiler/luci-interpreter/src/kernels/Div.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Div.test.cpp
@@ -134,6 +134,56 @@ TEST_F(DivTest, Uint8)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
}
+template <loco::DataType DType> void checkInteger(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ Shape base_shape = {2, 3, 1, 2};
+ std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+
+ std::vector<std::vector<dtype>> test_outputs = {{5, 6, 2, 0, 10, 3, //
+ 10, 0, 4, 5, 20, 0, //
+ 0, 0, 0, 2, 0, 0, //
+ 2, 0, 1, 10, 5, 0, //
+ 2, 3, 1, 0, 5, 1, //
+ 18, 20, 7, 0, 37, 10},
+ {5, 6, 4, 5, 0, 0, 2, 0, 1, 0, 37, 10},
+ {5, 7, 4, 6, 2, 3, 10, 0, 8, 0, 4, 0,
+ 0, 0, 0, 0, 0, 0, 0, 10, 5, 0, 1, 0,
+ 0, 0, 5, 9, 1, 1, 0, 0, 37, 50, 7, 10},
+ {5, 7, 8, 0, 0, 0, 0, 10, 5, 9, 7, 10}};
+ std::vector<dtype> input1_data{20, 30, 40, -17, -4, -7, 11, -31, 10, 19, 75, 100};
+ std::vector<dtype> input2_data{4, 5, 10, -3, 2, 10};
+ for (size_t i = 0; i < test_shapes.size(); ++i)
+ {
+ Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+ Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DType);
+
+ DivParams params{};
+ params.activation = Activation::RELU;
+
+ Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+ << "With shape number " << i;
+ }
+}
+
+TEST_F(DivTest, SInt64)
+{
+ checkInteger<loco::DataType::S64>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(DivTest, SInt32)
+{
+ checkInteger<loco::DataType::S32>(_memory_manager.get());
+ SUCCEED();
+}
+
TEST_F(DivTest, Input_Output_Type_NEG)
{
Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
@@ -149,9 +199,9 @@ TEST_F(DivTest, Input_Output_Type_NEG)
TEST_F(DivTest, Invalid_Input_Type_NEG)
{
- Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
- Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
- Tensor output_tensor = makeOutputTensor(DataType::S64);
+ Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U64);
DivParams params{};
params.activation = Activation::RELU;
@@ -162,6 +212,19 @@ TEST_F(DivTest, Invalid_Input_Type_NEG)
EXPECT_ANY_THROW(kernel.execute());
}
+TEST_F(DivTest, Invalid_Output_Type_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::S32>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+ DivParams params{};
+ params.activation = Activation::RELU;
+
+ Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
} // namespace
} // namespace kernels
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Equal.cpp b/compiler/luci-interpreter/src/kernels/Equal.cpp
index f58de1250..a57e127b7 100644
--- a/compiler/luci-interpreter/src/kernels/Equal.cpp
+++ b/compiler/luci-interpreter/src/kernels/Equal.cpp
@@ -49,6 +49,12 @@ void Equal::execute() const
case DataType::FLOAT32:
evalFloat();
break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
case DataType::U8:
evalQuantized();
break;
@@ -79,6 +85,29 @@ void Equal::evalFloat() const
}
}
+template <typename T> void Equal::evalInteger() const
+{
+ const auto x_data = getTensorData<T>(x());
+ const auto y_data = getTensorData<T>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowEqualNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::EqualNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+}
+
void Equal::evalQuantized() const
{
const auto x_data = getTensorData<uint8_t>(x());
diff --git a/compiler/luci-interpreter/src/kernels/Equal.h b/compiler/luci-interpreter/src/kernels/Equal.h
index 11f025eac..c9be32cc0 100644
--- a/compiler/luci-interpreter/src/kernels/Equal.h
+++ b/compiler/luci-interpreter/src/kernels/Equal.h
@@ -38,6 +38,7 @@ public:
private:
void evalFloat() const;
+ template <typename T> void evalInteger() const;
void evalQuantized() const;
private:
diff --git a/compiler/luci-interpreter/src/kernels/Equal.test.cpp b/compiler/luci-interpreter/src/kernels/Equal.test.cpp
index 46a0f97d8..5870e5460 100644
--- a/compiler/luci-interpreter/src/kernels/Equal.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Equal.test.cpp
@@ -99,6 +99,82 @@ TEST_F(EqualTest, FloatBroardcast)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
}
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{min_value, 2, max_value};
+
+ std::vector<dtype> y_data{min_value, -2, max_value};
+
+ std::vector<bool> ref_output_data{true, false, true};
+
+ Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{
+ min_value, 2, 3, // Row 1
+ 4, 5, max_value, // Row 2
+ -1, -2, -3, // Row 3
+ min_value, -2, max_value, // Row 4
+ };
+
+ std::vector<dtype> y_data{
+ min_value, -2, max_value, // Row 1
+ };
+
+ std::vector<bool> ref_output_data{
+ true, false, false, // Row 1
+ false, false, true, // Row 2
+ false, true, false, // Row 3
+ true, true, true, // Row 4
+ };
+
+ Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(EqualTest, Int32)
+{
+ checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(EqualTest, Int64)
+{
+ checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+ SUCCEED();
+}
+
// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
const float F_MIN = -128.0 / 128.0;
const float F_MAX = 127.0 / 128.0;
@@ -195,6 +271,36 @@ TEST_F(EqualTest, Input_Output_Type_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
+TEST_F(EqualTest, Float_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(EqualTest, Int32_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(EqualTest, Int64_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
} // namespace
} // namespace kernels
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ExpandDims.cpp b/compiler/luci-interpreter/src/kernels/ExpandDims.cpp
new file mode 100644
index 000000000..ba35c99fa
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ExpandDims.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ExpandDims.h"
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ExpandDims::ExpandDims(const Tensor *input, const Tensor *axis, Tensor *output)
+ : Kernel({input, axis}, {output})
+{
+}
+
+void ExpandDims::configure()
+{
+ int32_t axis_value;
+
+ switch (axis()->element_type())
+ {
+ case loco::DataType::S32:
+ axis_value = *getTensorData<int32_t>(axis());
+ break;
+ case loco::DataType::S64:
+ axis_value = static_cast<int32_t>(*getTensorData<int64_t>(axis()));
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+
+ const auto input_shape = input()->shape();
+
+ if (axis_value < 0)
+ {
+ axis_value += input_shape.num_dims() + 1;
+ }
+
+ LUCI_INTERPRETER_CHECK(axis_value <= input_shape.num_dims() and axis_value >= 0);
+
+ Shape output_shape(input_shape.num_dims() + 1);
+ for (int32_t i = 0; i < output_shape.num_dims(); ++i)
+ {
+ if (i < axis_value)
+ {
+ output_shape.dim(i) = input_shape.dim(i);
+ }
+ else if (i == axis_value)
+ {
+ output_shape.dim(i) = 1;
+ }
+ else
+ {
+ LUCI_INTERPRETER_CHECK(i >= 1);
+ output_shape.dim(i) = input_shape.dim(i - 1);
+ }
+ }
+
+ output()->resize(output_shape);
+}
+
+void ExpandDims::execute() const
+{
+ // Just copy input to output
+ const auto *input_data = input()->data<void>();
+ auto *output_data = output()->data<void>();
+
+ const size_t element_size = getDataTypeSize(input()->element_type());
+ const int32_t num_elements = input()->shape().num_elements();
+ std::memcpy(output_data, input_data, num_elements * element_size);
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ExpandDims.h b/compiler/luci-interpreter/src/kernels/ExpandDims.h
new file mode 100644
index 000000000..e510b1160
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ExpandDims.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H
+#define LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ExpandDims : public Kernel
+{
+public:
+ ExpandDims(const Tensor *input, const Tensor *axis, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *axis() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H
diff --git a/compiler/luci-interpreter/src/kernels/ExpandDims.test.cpp b/compiler/luci-interpreter/src/kernels/ExpandDims.test.cpp
new file mode 100644
index 000000000..df9eaccc0
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ExpandDims.test.cpp
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ExpandDims.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ExpandDimsTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(ExpandDimsTest, PositiveAxis)
+{
+ std::vector<int32_t> input_data{-1, 1, -2, 2};
+ std::initializer_list<int32_t> input_shape = {2, 2};
+
+ std::initializer_list<int32_t> axis_value = {0};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(input_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2}));
+}
+
+TEST_F(ExpandDimsTest, NegAxis)
+{
+ std::vector<int32_t> input_data{-1, 1, -2, 2};
+ std::initializer_list<int32_t> input_shape = {2, 2};
+
+ std::initializer_list<int32_t> axis_value = {-1};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(input_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 2, 1}));
+}
+
+TEST_F(ExpandDimsTest, InvalidAxisType_NEG)
+{
+ std::vector<int32_t> input_data{-1, 1, -2, 2};
+ std::initializer_list<int32_t> input_shape = {2, 2};
+
+ std::initializer_list<float> axis_value = {1.0};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::FLOAT32>({1}, axis_value, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ExpandDimsTest, InvalidAxisValue_NEG)
+{
+ std::vector<int32_t> input_data{-1, 1, -2, 2};
+ std::initializer_list<int32_t> input_shape = {2, 2};
+
+ std::initializer_list<int32_t> axis_value = {3};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+ Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/FullyConnected.cpp b/compiler/luci-interpreter/src/kernels/FullyConnected.cpp
index cfe8f8bf2..bd2bb2f35 100644
--- a/compiler/luci-interpreter/src/kernels/FullyConnected.cpp
+++ b/compiler/luci-interpreter/src/kernels/FullyConnected.cpp
@@ -18,8 +18,7 @@
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/reference/fully_connected.h>
-#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h>
+#include "PALFullyConnected.h"
#include <stdexcept>
@@ -74,7 +73,18 @@ void FullyConnected::configure()
if (bias())
LUCI_INTERPRETER_CHECK(bias()->shape().num_elements() == weights()->shape().dim(0));
- output()->resize({batch_size, num_units});
+ if (params().keep_num_dims == false)
+ {
+ output()->resize({batch_size, num_units});
+ }
+ else
+ {
+ luci_interpreter::Shape output_shape(input_shape.num_dims());
+ for (int i = 0; i < input_shape.num_dims(); ++i)
+ output_shape.dim(i) = input_shape.dim(i);
+ output_shape.dim(input_shape.num_dims() - 1) = num_units;
+ output()->resize(output_shape);
+ }
}
void FullyConnected::execute() const
@@ -172,7 +182,7 @@ void FullyConnected::evalQuantizedS8() const
op_params.quantized_activation_max = output_activation_max;
op_params.lhs_cacheable = false;
op_params.rhs_cacheable = false;
- tflite::reference_integer_ops::FullyConnected(
+ luci_interpreter_pal::FullyConnected<int8_t>(
op_params, getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(weights()),
getTensorData<int8_t>(weights()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
getTensorShape(output()), getTensorData<int8_t>(output()));
diff --git a/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp b/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp
index b0eda0145..4474cc4fb 100644
--- a/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp
@@ -133,7 +133,7 @@ template <typename T> class FullyConnectedTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
-TYPED_TEST_CASE(FullyConnectedTest, DataTypes);
+TYPED_TEST_SUITE(FullyConnectedTest, DataTypes);
TYPED_TEST(FullyConnectedTest, Simple)
{
diff --git a/compiler/luci-interpreter/src/kernels/Gather.cpp b/compiler/luci-interpreter/src/kernels/Gather.cpp
new file mode 100644
index 000000000..f1256660f
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Gather.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Gather.h"
+#include "kernels/Utils.h"
+#include "PALGather.h"
+
+#include <stdexcept>
+#include <cassert>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Gather::Gather(const Tensor *params, const Tensor *indices, Tensor *output,
+ const GatherParams &gparams)
+ : KernelWithParams<GatherParams>({params, indices}, {output}, gparams)
+{
+}
+
+void Gather::configure()
+{
+ if (params()->element_type() == DataType::FLOAT32)
+ {
+ LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
+ }
+ else
+ {
+ throw std::runtime_error("Unsupported type.");
+ }
+
+ LUCI_INTERPRETER_CHECK(indices()->element_type() == DataType::S32 ||
+ indices()->element_type() == DataType::S64);
+
+ // refer tensorflow/lite/kernels/gather.cc
+
+ const Shape &params_shape = params()->shape();
+ const Shape &indices_shape = indices()->shape();
+
+ int axis = _params.axis;
+ if (axis < 0)
+ {
+ axis += params_shape.num_dims();
+ }
+ LUCI_INTERPRETER_CHECK(0 <= axis && axis < params_shape.num_dims());
+
+ int batch_dims = _params.batch_dims;
+ // batch_dims should be in range: [-rank(indices), rank(indices)].
+ // Negative batch_dims is added with rank of positions.
+ if (batch_dims < 0)
+ {
+ batch_dims += indices_shape.num_dims();
+ }
+ LUCI_INTERPRETER_CHECK(batch_dims <= axis);
+ LUCI_INTERPRETER_CHECK(0 <= batch_dims && batch_dims < params_shape.num_dims());
+ LUCI_INTERPRETER_CHECK(batch_dims <= indices_shape.num_dims());
+ for (int i = 0; i < batch_dims; ++i)
+ {
+ LUCI_INTERPRETER_CHECK(params_shape.dim(i) == indices_shape.dim(i));
+ }
+
+ const int num_dimensions = params_shape.num_dims() + indices_shape.num_dims() - 1 - batch_dims;
+
+ Shape output_shape(num_dimensions);
+ int output_index = 0;
+ for (int i = 0; i < axis; ++i)
+ {
+ output_shape.dim(output_index++) = params_shape.dim(i);
+ }
+ for (int i = batch_dims; i < indices_shape.num_dims(); ++i)
+ {
+ output_shape.dim(output_index++) = indices_shape.dim(i);
+ }
+ for (int i = axis + 1; i < params_shape.num_dims(); ++i)
+ {
+ output_shape.dim(output_index++) = params_shape.dim(i);
+ }
+ output()->resize(output_shape);
+}
+
+void Gather::execute() const
+{
+ switch (params()->element_type())
+ {
+ case DataType::FLOAT32:
+ evalFloat();
+ break;
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+void Gather::evalFloat() const
+{
+ assert(indices()->element_type() == DataType::S32 || indices()->element_type() == DataType::S64);
+
+ const auto params_data = getTensorData<float>(params());
+ auto output_data = getTensorData<float>(output());
+
+ tflite::GatherParams tparams;
+ tparams.axis = _params.axis;
+ tparams.batch_dims = _params.batch_dims;
+
+ if (indices()->element_type() == DataType::S32)
+ {
+ const auto indices_data = getTensorData<int32_t>(indices());
+
+ luci_interpreter_pal::Gather<float, int32_t>(tparams, getTensorShape(params()), params_data,
+ getTensorShape(indices()), indices_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ const auto indices_data = getTensorData<int64_t>(indices());
+
+ luci_interpreter_pal::Gather<float, int64_t>(tparams, getTensorShape(params()), params_data,
+ getTensorShape(indices()), indices_data,
+ getTensorShape(output()), output_data);
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Gather.h b/compiler/luci-interpreter/src/kernels/Gather.h
new file mode 100644
index 000000000..cc02d64fb
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Gather.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_GATHER_H
+#define LUCI_INTERPRETER_KERNELS_GATHER_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Gather : public KernelWithParams<GatherParams>
+{
+public:
+ Gather(const Tensor *params, const Tensor *indices, Tensor *output, const GatherParams &gparams);
+
+ const Tensor *params() const { return _inputs[0]; }
+ const Tensor *indices() const { return _inputs[1]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_GATHER_H
diff --git a/compiler/luci-interpreter/src/kernels/Gather.test.cpp b/compiler/luci-interpreter/src/kernels/Gather.test.cpp
new file mode 100644
index 000000000..4b3dda708
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Gather.test.cpp
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Gather.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class GatherTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(GatherTest, Simple)
+{
+ std::vector<float> params_data{1.f, 2.f, 3.f, 4.f, 5.f, 6.f};
+ std::vector<int32_t> indices_data{1, 0, 1, 5};
+ std::vector<float> ref_output_data{2.f, 1.f, 2.f, 6.f};
+
+ Tensor params_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 6}, params_data, _memory_manager.get());
+ Tensor indices_tensor = makeInputTensor<DataType::S32>({4}, indices_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ GatherParams gparams;
+
+ gparams.axis = 1;
+ gparams.batch_dims = 0;
+
+ Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4}));
+}
+
+TEST_F(GatherTest, Simple_Batch)
+{
+ Shape params_shape = {3, 5};
+ Shape indices_shape = {3, 2};
+ std::vector<float> params_data{0., 0., 1., 0., 2., 3., 0., 0., 0., 4., 0., 5., 0., 6., 0.};
+ std::vector<int32_t> indices_data{2, 4, 0, 4, 1, 3};
+ std::vector<float> ref_output_data{1., 2., 3., 4., 5., 6.};
+
+ Tensor params_tensor =
+ makeInputTensor<DataType::FLOAT32>(params_shape, params_data, _memory_manager.get());
+ Tensor indices_tensor =
+ makeInputTensor<DataType::S32>(indices_shape, indices_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ GatherParams gparams;
+
+ gparams.axis = 1;
+ gparams.batch_dims = 1;
+
+ Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 2}));
+}
+
+TEST_F(GatherTest, Simple_NEG)
+{
+ Tensor params_tensor = makeInputTensor<DataType::S32>({1}, {1}, _memory_manager.get());
+ Tensor indices_tensor = makeInputTensor<DataType::S32>({1}, {0}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ GatherParams gparams;
+
+ Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GatherTest, Axis_NEG)
+{
+ Tensor params_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor indices_tensor = makeInputTensor<DataType::S32>({1}, {0}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ GatherParams gparams;
+
+ gparams.axis = 100;
+ gparams.batch_dims = 0;
+
+ Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GatherTest, Batch_NEG)
+{
+ std::vector<float> params_data{1.f, 2.f, 3.f, 4.f, 5.f, 6.f};
+ std::vector<int32_t> indices_data{1, 0, 1, 5};
+ std::vector<float> ref_output_data{2.f, 1.f, 2.f, 6.f};
+
+ Tensor params_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 6}, params_data, _memory_manager.get());
+ Tensor indices_tensor = makeInputTensor<DataType::S32>({4}, indices_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+ GatherParams gparams;
+
+ gparams.axis = 0;
+ gparams.batch_dims = 1;
+
+ Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Greater.cpp b/compiler/luci-interpreter/src/kernels/Greater.cpp
index f0dd2db36..5ccae3c38 100644
--- a/compiler/luci-interpreter/src/kernels/Greater.cpp
+++ b/compiler/luci-interpreter/src/kernels/Greater.cpp
@@ -49,6 +49,12 @@ void Greater::execute() const
case DataType::FLOAT32:
evalFloat();
break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
case DataType::U8:
evalQuantized();
break;
@@ -79,6 +85,29 @@ void Greater::evalFloat() const
}
}
+template <typename T> void Greater::evalInteger() const
+{
+ const auto x_data = getTensorData<T>(x());
+ const auto y_data = getTensorData<T>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowGreaterNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::GreaterNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+}
+
void Greater::evalQuantized() const
{
const auto x_data = getTensorData<uint8_t>(x());
diff --git a/compiler/luci-interpreter/src/kernels/Greater.h b/compiler/luci-interpreter/src/kernels/Greater.h
index 877c139c9..065f76d7b 100644
--- a/compiler/luci-interpreter/src/kernels/Greater.h
+++ b/compiler/luci-interpreter/src/kernels/Greater.h
@@ -38,6 +38,7 @@ public:
private:
void evalFloat() const;
+ template <typename T> void evalInteger() const;
void evalQuantized() const;
private:
diff --git a/compiler/luci-interpreter/src/kernels/Greater.test.cpp b/compiler/luci-interpreter/src/kernels/Greater.test.cpp
index ba3925f17..a48080124 100644
--- a/compiler/luci-interpreter/src/kernels/Greater.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Greater.test.cpp
@@ -97,6 +97,82 @@ TEST_F(GreaterTest, FloatBroardcast)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3}));
}
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{min_value, 2, max_value};
+
+ std::vector<dtype> y_data{min_value + 1, -2, max_value};
+
+ std::vector<bool> ref_output_data{false, true, false};
+
+ Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{
+ min_value, 2, 3, // Row 1
+ 4, 5, max_value, // Row 2
+ -1, -4, -3, // Row 3
+ min_value, -2, max_value, // Row 4
+ };
+
+ std::vector<dtype> y_data{
+ min_value + 1, -2, max_value - 1, // Row 1
+ };
+
+ std::vector<bool> ref_output_data{
+ false, true, false, // Row 1
+ true, true, true, // Row 2
+ true, false, false, // Row 3
+ false, false, true, // Row 4
+ };
+
+ Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(GreaterTest, Int32)
+{
+ checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(GreaterTest, Int64)
+{
+ checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+ SUCCEED();
+}
+
// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
const float F_MIN = -128.0 / 128.0;
const float F_MAX = 127.0 / 128.0;
@@ -223,6 +299,36 @@ TEST_F(GreaterTest, Input_Output_Type_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
+TEST_F(GreaterTest, Float_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterTest, Int32_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterTest, Int64_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
} // namespace
} // namespace kernels
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/GreaterEqual.cpp b/compiler/luci-interpreter/src/kernels/GreaterEqual.cpp
index e7c1b4afe..27e42c971 100644
--- a/compiler/luci-interpreter/src/kernels/GreaterEqual.cpp
+++ b/compiler/luci-interpreter/src/kernels/GreaterEqual.cpp
@@ -52,6 +52,12 @@ void GreaterEqual::execute() const
case DataType::FLOAT32:
evalFloat();
break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
case DataType::U8:
evalQuantized();
break;
@@ -82,6 +88,29 @@ void GreaterEqual::evalFloat() const
}
}
+template <typename T> void GreaterEqual::evalInteger() const
+{
+ const auto x_data = getTensorData<T>(x());
+ const auto y_data = getTensorData<T>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
+ op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+ else
+ {
+ tflite::reference_ops::GreaterEqualNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+}
+
void GreaterEqual::evalQuantized() const
{
const auto x_data = getTensorData<uint8_t>(x());
diff --git a/compiler/luci-interpreter/src/kernels/GreaterEqual.h b/compiler/luci-interpreter/src/kernels/GreaterEqual.h
index 4a0f48748..e333c30a6 100644
--- a/compiler/luci-interpreter/src/kernels/GreaterEqual.h
+++ b/compiler/luci-interpreter/src/kernels/GreaterEqual.h
@@ -38,6 +38,7 @@ public:
private:
void evalFloat() const;
+ template <typename T> void evalInteger() const;
void evalQuantized() const;
private:
diff --git a/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp b/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp
index a9d172301..35bf88eab 100644
--- a/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp
@@ -96,6 +96,81 @@ TEST_F(GreaterEqualTest, FloatBroardcast)
EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3}));
}
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{min_value, 2, max_value};
+
+ std::vector<dtype> y_data{min_value + 1, -2, max_value};
+
+ std::vector<bool> ref_output_data{false, true, true};
+
+ Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{
+ min_value, 2, 3, // Row 1
+ 4, 5, max_value, // Row 2
+ -1, -4, -3, // Row 3
+ min_value, -2, max_value - 1, // Row 4
+ };
+
+ std::vector<dtype> y_data{
+ min_value + 1, -2, max_value - 1, // Row 1
+ };
+
+ std::vector<bool> ref_output_data{
+ false, true, false, // Row 1
+ true, true, true, // Row 2
+ true, false, false, // Row 3
+ false, true, true, // Row 4
+ };
+
+ Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(GreaterEqualTest, Int32)
+{
+ checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(GreaterEqualTest, Int64)
+{
+ checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+ SUCCEED();
+}
// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
const float F_MIN = -128.0 / 128.0;
@@ -223,6 +298,36 @@ TEST_F(GreaterEqualTest, Input_Output_Type_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
+TEST_F(GreaterEqualTest, Float_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterEqualTest, Int32_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterEqualTest, Int64_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
} // namespace
} // namespace kernels
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp b/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp
index 1e565e358..6f960e8b4 100644
--- a/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp
@@ -81,7 +81,7 @@ template <typename T> class L2NormalizeTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(L2NormalizeTest, DataTypes);
+TYPED_TEST_SUITE(L2NormalizeTest, DataTypes);
TYPED_TEST(L2NormalizeTest, Simple)
{
diff --git a/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp b/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp
index 289742a50..7245456cb 100644
--- a/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp
@@ -206,7 +206,8 @@ TEST_F(L2Pool2DTest, FloatPaddingSameStride)
kernel.execute();
std::vector<float> ref_output_data{3.5, 6.0, 6.5, 5.70088, 2.54951, 7.2111, 8.63134, 7.0};
- EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ // NOTE with NEON+ruy, error is #1=-1.14441e-05, #6=-1.81198e-05
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, 1.0e-4f));
// TODO make a Shape checking of output_tensor.
}
diff --git a/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp b/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp
index 6ec8a348a..0f6263b57 100644
--- a/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp
@@ -83,7 +83,7 @@ template <typename T> class LeakReluTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(LeakReluTest, DataTypes);
+TYPED_TEST_SUITE(LeakReluTest, DataTypes);
TYPED_TEST(LeakReluTest, Simple)
{
diff --git a/compiler/luci-interpreter/src/kernels/Less.cpp b/compiler/luci-interpreter/src/kernels/Less.cpp
index 041444926..8d26ff297 100644
--- a/compiler/luci-interpreter/src/kernels/Less.cpp
+++ b/compiler/luci-interpreter/src/kernels/Less.cpp
@@ -49,6 +49,12 @@ void Less::execute() const
case DataType::FLOAT32:
evalFloat();
break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
case DataType::U8:
evalQuantized();
break;
@@ -79,6 +85,29 @@ void Less::evalFloat() const
}
}
+template <typename T> void Less::evalInteger() const
+{
+ const auto x_data = getTensorData<T>(x());
+ const auto y_data = getTensorData<T>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowLessNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::LessNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+}
+
void Less::evalQuantized() const
{
const auto x_data = getTensorData<uint8_t>(x());
diff --git a/compiler/luci-interpreter/src/kernels/Less.h b/compiler/luci-interpreter/src/kernels/Less.h
index 293740e72..e27bb689c 100644
--- a/compiler/luci-interpreter/src/kernels/Less.h
+++ b/compiler/luci-interpreter/src/kernels/Less.h
@@ -38,6 +38,7 @@ public:
private:
void evalFloat() const;
+ template <typename T> void evalInteger() const;
void evalQuantized() const;
private:
diff --git a/compiler/luci-interpreter/src/kernels/Less.test.cpp b/compiler/luci-interpreter/src/kernels/Less.test.cpp
index e9d09b288..8c5963363 100644
--- a/compiler/luci-interpreter/src/kernels/Less.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Less.test.cpp
@@ -97,6 +97,82 @@ TEST_F(LessTest, FloatBroardcast)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3}));
}
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{min_value, 2, max_value};
+
+ std::vector<dtype> y_data{min_value + 1, -2, max_value};
+
+ std::vector<bool> ref_output_data{true, false, false};
+
+ Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Less kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{
+ min_value, 2, 3, // Row 1
+ 4, 5, max_value, // Row 2
+ -1, -4, -3, // Row 3
+ min_value, -2, max_value, // Row 4
+ };
+
+ std::vector<dtype> y_data{
+ min_value + 1, -2, max_value - 1, // Row 1
+ };
+
+ std::vector<bool> ref_output_data{
+ true, false, true, // Row 1
+ false, false, false, // Row 2
+ false, true, true, // Row 3
+ true, false, false, // Row 4
+ };
+
+ Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Less kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(LessTest, Int32)
+{
+ checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(LessTest, Int64)
+{
+ checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+ SUCCEED();
+}
+
// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
const float F_MIN = -128.0 / 128.0;
const float F_MAX = 127.0 / 128.0;
@@ -223,6 +299,36 @@ TEST_F(LessTest, Input_Output_Type_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
+TEST_F(LessTest, Float_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Less kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessTest, Int32_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Less kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessTest, Int64_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ Less kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
} // namespace
} // namespace kernels
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/LessEqual.cpp b/compiler/luci-interpreter/src/kernels/LessEqual.cpp
index 5f4c7f7aa..b474bc47a 100644
--- a/compiler/luci-interpreter/src/kernels/LessEqual.cpp
+++ b/compiler/luci-interpreter/src/kernels/LessEqual.cpp
@@ -49,6 +49,12 @@ void LessEqual::execute() const
case DataType::FLOAT32:
evalFloat();
break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
case DataType::U8:
evalQuantized();
break;
@@ -79,6 +85,29 @@ void LessEqual::evalFloat() const
}
}
+template <typename T> void LessEqual::evalInteger() const
+{
+ const auto x_data = getTensorData<T>(x());
+ const auto y_data = getTensorData<T>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowLessEqualNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::LessEqualNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+}
+
void LessEqual::evalQuantized() const
{
const auto x_data = getTensorData<uint8_t>(x());
diff --git a/compiler/luci-interpreter/src/kernels/LessEqual.h b/compiler/luci-interpreter/src/kernels/LessEqual.h
index b6da1a2a8..f82ea90d4 100644
--- a/compiler/luci-interpreter/src/kernels/LessEqual.h
+++ b/compiler/luci-interpreter/src/kernels/LessEqual.h
@@ -38,6 +38,7 @@ public:
private:
void evalFloat() const;
+ template <typename T> void evalInteger() const;
void evalQuantized() const;
private:
diff --git a/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp b/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp
index 0558003dd..b2e2fa7a1 100644
--- a/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp
@@ -97,6 +97,82 @@ TEST_F(LessEqualTest, FloatBroardcast)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3}));
}
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{min_value, 2, max_value};
+
+ std::vector<dtype> y_data{min_value + 1, -2, max_value};
+
+ std::vector<bool> ref_output_data{true, false, true};
+
+ Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{
+ min_value, 2, 3, // Row 1
+ 4, 5, max_value, // Row 2
+ -1, -4, -3, // Row 3
+ min_value, -2, max_value, // Row 4
+ };
+
+ std::vector<dtype> y_data{
+ min_value + 1, -2, max_value - 1, // Row 1
+ };
+
+ std::vector<bool> ref_output_data{
+ true, false, true, // Row 1
+ false, false, false, // Row 2
+ false, true, true, // Row 3
+ true, true, false, // Row 4
+ };
+
+ Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(LessEqualTest, Int32)
+{
+ checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(LessEqualTest, Int64)
+{
+ checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+ SUCCEED();
+}
+
// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
const float F_MIN = -128.0 / 128.0;
const float F_MAX = 127.0 / 128.0;
@@ -223,6 +299,36 @@ TEST_F(LessEqualTest, Input_Output_Type_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
+TEST_F(LessEqualTest, Float_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessEqualTest, Int32_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessEqualTest, Int64_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
} // namespace
} // namespace kernels
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Logistic.test.cpp b/compiler/luci-interpreter/src/kernels/Logistic.test.cpp
index 70227563f..5a1ea669c 100644
--- a/compiler/luci-interpreter/src/kernels/Logistic.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Logistic.test.cpp
@@ -76,7 +76,7 @@ template <typename T> class LogisticTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(LogisticTest, DataTypes);
+TYPED_TEST_SUITE(LogisticTest, DataTypes);
TYPED_TEST(LogisticTest, Simple)
{
diff --git a/compiler/luci-interpreter/src/kernels/MirrorPad.cpp b/compiler/luci-interpreter/src/kernels/MirrorPad.cpp
index 89049c96c..2fbeefce4 100644
--- a/compiler/luci-interpreter/src/kernels/MirrorPad.cpp
+++ b/compiler/luci-interpreter/src/kernels/MirrorPad.cpp
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -18,8 +19,6 @@
#include "kernels/Utils.h"
-#include <tensorflow/lite/kernels/internal/reference/pad.h>
-
namespace luci_interpreter
{
namespace kernels
@@ -59,44 +58,25 @@ void MirrorPad::configure()
output()->resize(output_shape);
}
+template <typename T>
+inline void MirrorPadImpl(const Tensor &input, const Tensor &paddings, MirrorPadMode mode,
+ Tensor &output);
+
void MirrorPad::execute() const
{
- const int num_dims = input()->shape().num_dims();
-
- tflite::PadParams params{};
- params.left_padding_count = num_dims;
- params.right_padding_count = num_dims;
-
- const auto *paddings_data = getTensorData<int32_t>(paddings());
- for (int i = num_dims - 1; i >= 0; --i)
- {
- params.left_padding[i] = paddings_data[i * 2];
- params.right_padding[i] = paddings_data[i * 2 + 1];
- }
-
switch (input()->element_type())
{
case DataType::FLOAT32:
{
- const float pad_value = 0;
-
- // NOTE: this implementation only obtains min-max values for quantization
- // TODO: calculate proper inference values
- tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<float>(input()),
- &pad_value, getTensorShape(output()),
- getTensorData<float>(output()));
+ MirrorPadImpl<float>(*input(), *paddings(), params().mode, *output());
break;
}
case DataType::U8:
{
- // NOTE: this implementation only obtains min-max values for quantization
- // TODO: calculate proper inference values
assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min());
assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max());
- const auto pad_value = static_cast<uint8_t>(output()->zero_point());
- tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
- &pad_value, getTensorShape(output()),
- getTensorData<uint8_t>(output()));
+
+ MirrorPadImpl<uint8_t>(*input(), *paddings(), params().mode, *output());
break;
}
default:
@@ -104,5 +84,87 @@ void MirrorPad::execute() const
}
}
+template <typename T>
+inline void MirrorPadImpl(const Tensor &input, const Tensor &paddings, MirrorPadMode mode,
+ Tensor &output)
+{
+ auto const input_dims = input.shape().num_dims();
+ auto const input_data = input.data<T>();
+ auto const paddings_data = paddings.data<int32_t>();
+ auto const output_data = output.data<T>();
+
+ auto const input_b = input_dims > 3 ? input.shape().dim(input_dims - 4) : 1;
+ auto const input_h = input_dims > 2 ? input.shape().dim(input_dims - 3) : 1;
+ auto const input_w = input_dims > 1 ? input.shape().dim(input_dims - 2) : 1;
+ auto const input_d = input.shape().dim(input_dims - 1);
+
+ auto const input_h_offset = input_d * input_w;
+ auto const input_b_offset = input_h_offset * input_h;
+
+ auto const output_b = input_dims > 3 ? output.shape().dim(input_dims - 4) : 1;
+ auto const output_h = input_dims > 2 ? output.shape().dim(input_dims - 3) : 1;
+ auto const output_w = input_dims > 1 ? output.shape().dim(input_dims - 2) : 1;
+ auto const output_d = output.shape().dim(input_dims - 1);
+
+ auto const left_b_pad = paddings_data[2 * (input_dims - 4)];
+ auto const left_h_pad = paddings_data[2 * (input_dims - 3)];
+ auto const left_w_pad = paddings_data[2 * (input_dims - 2)];
+ auto const left_d_pad = paddings_data[2 * (input_dims - 1)];
+
+ auto const right_b_pad = paddings_data[2 * (input_dims - 4) + 1];
+ auto const right_h_pad = paddings_data[2 * (input_dims - 3) + 1];
+ auto const right_w_pad = paddings_data[2 * (input_dims - 2) + 1];
+ auto const right_d_pad = paddings_data[2 * (input_dims - 1) + 1];
+
+ const auto positive_mod = [](auto a, auto b) { return (a % b + b) % b; };
+ const auto offset_index = [input_d, input_h_offset, input_b_offset](auto d, auto w, auto h,
+ auto b) {
+ return d + w * input_d + h * input_h_offset + b * input_b_offset;
+ };
+
+ const auto symmetric_dim = [&positive_mod](auto i, auto left_pad, auto input) {
+ bool reflected = (((i < left_pad ? i + 1 - input : i) - left_pad) / input & 1) == 1;
+ return positive_mod(reflected ? input + left_pad - i - 1 : i - left_pad, input);
+ };
+
+ const T *in_ptr = input_data;
+ T *out_ptr = output_data;
+
+ for (int32_t b = 0; b < output_b; ++b)
+ {
+ for (int32_t h = 0; h < output_h; ++h)
+ {
+ for (int32_t w = 0; w < output_w; ++w)
+ {
+ for (int32_t d = 0; d < output_d; ++d)
+ {
+ if (b < left_b_pad || b >= output_b - right_b_pad || //
+ h < left_h_pad || h >= output_h - right_h_pad || //
+ w < left_w_pad || w >= output_w - right_w_pad || //
+ d < left_d_pad || d >= output_d - right_d_pad)
+ {
+ if (mode == MirrorPadMode::REFLECT)
+ {
+ *out_ptr++ = input_data[offset_index(
+ positive_mod(d - left_d_pad, input_d), positive_mod(w - left_w_pad, input_w),
+ positive_mod(h - left_h_pad, input_h), positive_mod(b - left_b_pad, input_b))];
+ }
+ else
+ {
+ *out_ptr++ = input_data[offset_index(
+ symmetric_dim(d, left_d_pad, input_d), symmetric_dim(w, left_w_pad, input_w),
+ symmetric_dim(h, left_h_pad, input_h), symmetric_dim(b, left_b_pad, input_b))];
+ }
+ }
+ else
+ {
+ *out_ptr++ = *in_ptr++;
+ }
+ }
+ }
+ }
+ }
+}
+
} // namespace kernels
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp b/compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp
index de9da5051..740d8cb22 100644
--- a/compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/MirrorPad.test.cpp
@@ -14,4 +14,212 @@
* limitations under the License.
*/
-// TODO: Add tests for MirrorPad
+#include "kernels/MirrorPad.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class MirrorPadTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ void Execute(const Tensor &input, const Tensor &padding, Tensor &output, MirrorPadMode mode)
+ {
+ MirrorPadParams params{};
+ params.mode = mode;
+
+ MirrorPad kernel(&input, &padding, &output, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output);
+ kernel.execute();
+ }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MirrorPadTest, FloatReflect)
+{
+ Shape input_shape = {1, 2, 2, 1};
+ Shape padding_shape = {4, 2};
+
+ std::vector<float> input_data{1.0f, 2.0f, //
+ 3.0f, 4.0f}; //
+ std::vector<int> padding_data{0, 0, 2, 1, 1, 2, 0, 0};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor padding_tensor =
+ makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT);
+
+ std::vector<float> ref_output_data{2.0f, 1.0f, 2.0f, 1.0f, 2.0f, //
+ 4.0f, 3.0f, 4.0f, 3.0f, 4.0f, //
+ 2.0f, 1.0f, 2.0f, 1.0f, 2.0f, //
+ 4.0f, 3.0f, 4.0f, 3.0f, 4.0f, //
+ 2.0f, 1.0f, 2.0f, 1.0f, 2.0f}; //
+ std::initializer_list<int32_t> ref_output_shape{1, 5, 5, 1};
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, FloatSymmetric)
+{
+ Shape input_shape = {1, 2, 2, 1};
+ Shape padding_shape = {4, 2};
+
+ std::vector<float> input_data{1.0f, 2.0f, //
+ 3.0f, 4.0f}; //
+ std::vector<int> padding_data{0, 0, 2, 1, 1, 2, 0, 0};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor padding_tensor =
+ makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC);
+
+ std::vector<float> ref_output_data{3.0, 3.0, 4.0, 4.0, 3.0, //
+ 1.0, 1.0, 2.0, 2.0, 1.0, //
+ 1.0, 1.0, 2.0, 2.0, 1.0, //
+ 3.0, 3.0, 4.0, 4.0, 3.0, //
+ 3.0, 3.0, 4.0, 4.0, 3.0}; //
+ std::initializer_list<int32_t> ref_output_shape{1, 5, 5, 1};
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, FloatSymmetric2Dim)
+{
+ Shape input_shape = {3, 1};
+ Shape padding_shape = {2, 2};
+
+ std::vector<float> input_data{1.0f, 2.0f, 3.0f};
+ std::vector<int> padding_data{1, 2, 0, 0};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor padding_tensor =
+ makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC);
+
+ std::vector<float> ref_output_data{1.0, 1.0, 2.0, 3.0, 3.0, 2.0};
+ std::initializer_list<int32_t> ref_output_shape{6, 1};
+
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, Uint8Reflect)
+{
+ Shape input_shape = {1, 2, 3, 1};
+ Shape padding_shape = {4, 2};
+
+ float quant_tolerance = getTolerance(0.0f, 6.0f, 255);
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(0.0f, 6.0f);
+
+ std::vector<float> input_data{1.0f, 2.0f, 3.0f, //
+ 4.0f, 5.0f, 6.0f}; //
+ std::vector<int> padding_data{0, 0, 2, 1, 1, 3, 0, 0};
+
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+
+ Tensor padding_tensor =
+ makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+ Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT);
+
+ std::vector<float> ref_output_data{
+ 3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, //
+ 6.0f, 4.0f, 5.0f, 6.0f, 4.0f, 5.0f, 6.0f, //
+ 3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, //
+ 6.0f, 4.0f, 5.0f, 6.0f, 4.0f, 5.0f, 6.0f, //
+ 3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, //
+ };
+ std::initializer_list<int32_t> ref_output_shape{1, 5, 7, 1};
+
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(ref_output_data, quant_tolerance));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, Uint8Symmetric)
+{
+ Shape input_shape = {1, 2, 3, 1};
+ Shape padding_shape = {4, 2};
+
+ float quant_tolerance = getTolerance(0.0f, 6.0f, 255);
+ std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(0.0f, 6.0f);
+
+ std::vector<float> input_data{1.0f, 2.0f, 3.0f, //
+ 4.0f, 5.0f, 6.0f}; //
+ std::vector<int> padding_data{0, 0, 2, 1, 1, 3, 0, 0};
+
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+
+ Tensor padding_tensor =
+ makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+ Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+ Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC);
+
+ std::vector<float> ref_output_data{
+ 4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, //
+ 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f, //
+ 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f, //
+ 4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, //
+ 4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, //
+ };
+ std::initializer_list<int32_t> ref_output_shape{1, 5, 7, 1};
+
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(ref_output_data, quant_tolerance));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, UnsupportedDim_NEG)
+{
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 1, 1, 1, 1}, {1.0f}, _memory_manager.get());
+ Tensor padding_tensor =
+ makeInputTensor<DataType::S32>({5, 2}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ EXPECT_ANY_THROW(Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT));
+}
+
+TEST_F(MirrorPadTest, InvalidInputType_NEG)
+{
+ Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+ Tensor padding_tensor = makeInputTensor<DataType::S32>({1, 2}, {0, 0}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+ EXPECT_ANY_THROW(Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Mul.cpp b/compiler/luci-interpreter/src/kernels/Mul.cpp
index bc855de0f..531fb4fa1 100644
--- a/compiler/luci-interpreter/src/kernels/Mul.cpp
+++ b/compiler/luci-interpreter/src/kernels/Mul.cpp
@@ -42,6 +42,8 @@ void Mul::configure()
LUCI_INTERPRETER_CHECK(output()->element_type() == input1()->element_type());
if (input1()->element_type() == DataType::S16)
{
+ LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 &&
+ input2()->zero_points().size() == 1)
LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 &&
output()->zero_point() == 0);
}
@@ -56,6 +58,12 @@ void Mul::execute() const
case DataType::FLOAT32:
evalFloat();
break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
case DataType::S16:
evalQuantizedS16();
break;
@@ -66,13 +74,8 @@ void Mul::execute() const
void Mul::evalFloat() const
{
- float activation_min{};
- float activation_max{};
- calculateActivationRange(_params.activation, &activation_min, &activation_max);
-
tflite::ArithmeticParams params{};
- params.float_activation_min = activation_min;
- params.float_activation_max = activation_max;
+ fillArithmeticActivationRange<float>(params, _params.activation);
const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
getTensorShape(input1()), getTensorShape(input2()), &params);
@@ -91,6 +94,28 @@ void Mul::evalFloat() const
}
}
+template <typename T> void Mul::evalInteger() const
+{
+ tflite::ArithmeticParams params{};
+ fillArithmeticActivationRange<T>(params, _params.activation);
+
+ const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+ getTensorShape(input1()), getTensorShape(input2()), &params);
+
+ if (need_broadcast)
+ {
+ luci_interpreter_pal::BroadcastMul4DSlow(
+ params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+ getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+ }
+ else
+ {
+ luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<T>(input1()),
+ getTensorShape(input2()), getTensorData<T>(input2()),
+ getTensorShape(output()), getTensorData<T>(output()));
+ }
+}
+
void Mul::evalQuantizedS16() const
{
const auto input1_scale = static_cast<double>(input1()->scale());
diff --git a/compiler/luci-interpreter/src/kernels/Mul.h b/compiler/luci-interpreter/src/kernels/Mul.h
index 2ccf60f3a..c0cf817df 100644
--- a/compiler/luci-interpreter/src/kernels/Mul.h
+++ b/compiler/luci-interpreter/src/kernels/Mul.h
@@ -42,6 +42,7 @@ public:
private:
void evalFloat() const;
+ template <typename T> void evalInteger() const;
void evalQuantizedS16() const;
};
diff --git a/compiler/luci-interpreter/src/kernels/Mul.test.cpp b/compiler/luci-interpreter/src/kernels/Mul.test.cpp
index 471f6ac86..fc0e60614 100644
--- a/compiler/luci-interpreter/src/kernels/Mul.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Mul.test.cpp
@@ -93,6 +93,78 @@ TEST_F(MulTest, Float)
}
}
+template <loco::DataType DType> void checkInteger(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ Shape base_shape = {2, 3, 1, 2};
+ std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+
+ dtype max_value = std::numeric_limits<dtype>::max();
+ dtype res_max = max_value - max_value % 10;
+
+ std::vector<std::vector<dtype>> test_outputs = {
+ {8, 0, 20, 0, 4, 30, //
+ 16, 0, 40, 3, 8, 0, //
+ 0, 0, 0, 6, 0, 0, //
+ 4, 0, 10, 9, 2, 0, //
+ 40, 0, 100, 0, 20, 150, //
+ 28, 0, 70, 0, 14, res_max},
+ {8, 0, 40, 3, 0, 0, 4, 0, 100, 0, 14, res_max},
+ {8, 12, 0, 0, 20, 30, 16, 0, 0, 0, 40, 0, 0, 0, 0, 0, 0,
+ 0, 0, 9, 2, 0, 10, 0, 0, 0, 20, 30, 100, 150, 0, 0, 14, max_value / 10 * 2,
+ 70, res_max},
+ {8, 12, 0, 0, 0, 0, 0, 9, 20, 30, 70, res_max}};
+ std::vector<dtype> input1_data{2, 3, 4, -1, -3, -2, 1, -3, 10, 15, 7, max_value / 10};
+ std::vector<dtype> input2_data{4, 0, 10, -3, 2, 10};
+ for (size_t i = 0; i < test_shapes.size(); ++i)
+ {
+ Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+ Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DType);
+
+ MulParams params{};
+ params.activation = Activation::RELU;
+
+ Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+ << "With shape number " << i;
+ }
+ // Re-run with exchanged inputs.
+ for (size_t i = 0; i < test_shapes.size(); ++i)
+ {
+ Tensor input1_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+ Tensor input2_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DType);
+
+ MulParams params{};
+ params.activation = Activation::RELU;
+
+ Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+ << "With shape number " << i;
+ }
+}
+
+TEST_F(MulTest, SInt64)
+{
+ checkInteger<loco::DataType::S64>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(MulTest, SInt32)
+{
+ checkInteger<loco::DataType::S32>(_memory_manager.get());
+ SUCCEED();
+}
+
TEST_F(MulTest, SInt16)
{
Shape base_shape = {2, 3, 1, 2};
@@ -161,6 +233,60 @@ TEST_F(MulTest, SInt16)
}
}
+TEST_F(MulTest, Input_Output_Type_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ MulParams params{};
+ params.activation = Activation::RELU;
+
+ Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(MulTest, Invalid_Output_Type_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ MulParams params{};
+ params.activation = Activation::RELU;
+
+ Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(MulTest, Invalid_Input_Type_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U64);
+
+ MulParams params{};
+ params.activation = Activation::RELU;
+
+ Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST_F(MulTest, Invalid_Quantization_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::S16>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S16>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16);
+
+ MulParams params{};
+ params.activation = Activation::NONE;
+
+ Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
} // namespace
} // namespace kernels
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/NotEqual.cpp b/compiler/luci-interpreter/src/kernels/NotEqual.cpp
index 99d5e0fa0..54e5eee34 100644
--- a/compiler/luci-interpreter/src/kernels/NotEqual.cpp
+++ b/compiler/luci-interpreter/src/kernels/NotEqual.cpp
@@ -49,6 +49,12 @@ void NotEqual::execute() const
case DataType::FLOAT32:
evalFloat();
break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
case DataType::U8:
evalQuantized();
break;
@@ -79,6 +85,29 @@ void NotEqual::evalFloat() const
}
}
+template <typename T> void NotEqual::evalInteger() const
+{
+ const auto x_data = getTensorData<T>(x());
+ const auto y_data = getTensorData<T>(y());
+ auto output_data = getTensorData<bool>(output());
+
+ tflite::ComparisonParams op_params;
+ op_params.is_broadcast = x()->shape() != y()->shape();
+
+ if (op_params.is_broadcast)
+ {
+ tflite::reference_ops::Broadcast4DSlowNotEqualNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data,
+ getTensorShape(output()), output_data);
+ }
+ else
+ {
+ tflite::reference_ops::NotEqualNoScaling(op_params, getTensorShape(x()), x_data,
+ getTensorShape(y()), y_data, getTensorShape(output()),
+ output_data);
+ }
+}
+
void NotEqual::evalQuantized() const
{
const auto x_data = getTensorData<uint8_t>(x());
diff --git a/compiler/luci-interpreter/src/kernels/NotEqual.h b/compiler/luci-interpreter/src/kernels/NotEqual.h
index 247874df7..d2aafe893 100644
--- a/compiler/luci-interpreter/src/kernels/NotEqual.h
+++ b/compiler/luci-interpreter/src/kernels/NotEqual.h
@@ -38,6 +38,7 @@ public:
private:
void evalFloat() const;
+ template <typename T> void evalInteger() const;
void evalQuantized() const;
private:
diff --git a/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp b/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp
index 763f86893..45bf4022a 100644
--- a/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp
@@ -99,6 +99,82 @@ TEST_F(NotEqualTest, FloatBroardcast)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
}
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{min_value, 2, max_value};
+
+ std::vector<dtype> y_data{min_value, -2, max_value};
+
+ std::vector<bool> ref_output_data{false, true, false};
+
+ Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ dtype min_value = std::numeric_limits<dtype>::min();
+ dtype max_value = std::numeric_limits<dtype>::max();
+ std::vector<dtype> x_data{
+ min_value, 2, 3, // Row 1
+ 4, 5, max_value, // Row 2
+ -1, -2, -3, // Row 3
+ min_value, -2, max_value, // Row 4
+ };
+
+ std::vector<dtype> y_data{
+ min_value, -2, max_value, // Row 1
+ };
+
+ std::vector<bool> ref_output_data{
+ false, true, true, // Row 1
+ true, true, false, // Row 2
+ true, false, true, // Row 3
+ false, false, false, // Row 4
+ };
+
+ Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+ Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(NotEqualTest, Int32)
+{
+ checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(NotEqualTest, Int64)
+{
+ checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+ checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+ SUCCEED();
+}
+
// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
const float F_MIN = -128.0 / 128.0;
const float F_MAX = 127.0 / 128.0;
@@ -195,6 +271,36 @@ TEST_F(NotEqualTest, Input_Output_Type_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
+TEST_F(NotEqualTest, Float_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(NotEqualTest, Int32_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(NotEqualTest, Int64_Broadcast_NEG)
+{
+ Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+ Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+ NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+ ASSERT_ANY_THROW(kernel.configure());
+}
+
} // namespace
} // namespace kernels
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/OneHot.cpp b/compiler/luci-interpreter/src/kernels/OneHot.cpp
new file mode 100644
index 000000000..4d3e5f2ef
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/OneHot.cpp
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/OneHot.h"
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+
+template <typename T>
+void OneHotComputeImpl(const Tensor *indices_tensor, const Tensor *on_value_tensor,
+ const Tensor *off_value_tensor, int32_t depth, int32_t axis,
+ Tensor *output_tensor)
+{
+ // define input shape and correct axis
+ auto const &input_shape = indices_tensor->shape();
+ axis = axis == -1 ? input_shape.num_dims() : axis;
+
+ // TODO support other integer input types
+ auto const *indices = getTensorData<int32_t>(indices_tensor);
+ auto const on_value = getTensorData<T>(on_value_tensor)[0];
+ auto const off_value = getTensorData<T>(off_value_tensor)[0];
+ auto *output = getTensorData<T>(output_tensor);
+
+ // prefix_dim_size == # of elements before the axis
+ // depth == # of elements per axis
+ // suffix_dim_size == # of elements after the axis
+ auto prefix_dim_size = 1;
+ for (int32_t i = 0; i < axis; ++i)
+ {
+ prefix_dim_size *= input_shape.dim(i);
+ }
+ assert(prefix_dim_size > 0);
+ auto const suffix_dim_size = input_shape.num_elements() / prefix_dim_size;
+
+ // View the indices as a matrix of size:
+ // prefix_dim_size x suffix_dim_size
+ // View the output as a matrix of size:
+ // prefix_dim_size x depth x suffix_dim_size
+ // Then the output is:
+ // output(i, j, k) == (indices(i, k) == j) ? on : off
+ for (int32_t i = 0; i < prefix_dim_size; ++i)
+ for (int32_t j = 0; j < depth; ++j)
+ for (int32_t k = 0; k < suffix_dim_size; ++k, ++output)
+ *output = indices[i * suffix_dim_size + k] == j ? on_value : off_value;
+}
+
+} // namespace
+
+OneHot::OneHot(const Tensor *indices, const Tensor *depth, const Tensor *on_value,
+ const Tensor *off_value, Tensor *output, const OneHotParams &params)
+ : KernelWithParams<OneHotParams>({indices, depth, on_value, off_value}, {output}, params)
+{
+ // Do nothing
+}
+
+void OneHot::configure()
+{
+ // check types
+ LUCI_INTERPRETER_CHECK(indices()->element_type() == DataType::S32);
+ LUCI_INTERPRETER_CHECK(depth()->element_type() == DataType::S32);
+ LUCI_INTERPRETER_CHECK(on_value()->element_type() == off_value()->element_type());
+ LUCI_INTERPRETER_CHECK(output()->element_type() == on_value()->element_type());
+
+ // check shape dependent parameters
+ LUCI_INTERPRETER_CHECK(on_value()->shape().num_elements() == 1);
+ LUCI_INTERPRETER_CHECK(off_value()->shape().num_elements() == 1);
+ LUCI_INTERPRETER_CHECK(depth()->shape().num_elements() == 1);
+ LUCI_INTERPRETER_CHECK(params().axis >= -1 && params().axis <= indices()->shape().num_dims());
+
+ // define parameters that affect the output shape
+ auto const depth_value = getTensorData<int32_t>(depth())[0];
+ auto const &input_shape = indices()->shape();
+ auto const input_dims = input_shape.num_dims();
+ auto const axis = params().axis == -1 ? input_dims : params().axis;
+
+ // define output shape
+ Shape output_shape(input_shape.num_dims() + 1);
+ {
+ for (int32_t d = 0; d < axis; ++d)
+ output_shape.dim(d) = input_shape.dim(d);
+
+ output_shape.dim(axis) = depth_value;
+
+ for (int32_t d = axis + 1; d < output_shape.num_dims(); ++d)
+ output_shape.dim(d) = input_shape.dim(d - 1);
+ }
+
+ // reshape output
+ output()->resize(output_shape);
+}
+
+void OneHot::execute() const
+{
+ auto const depth_value = getTensorData<int32_t>(depth())[0];
+ auto const axis = params().axis;
+
+ switch (output()->element_type())
+ {
+ case loco::DataType::FLOAT32:
+ OneHotComputeImpl<float>(indices(), on_value(), off_value(), depth_value, axis, output());
+ break;
+ case loco::DataType::U8:
+ OneHotComputeImpl<uint8_t>(indices(), on_value(), off_value(), depth_value, axis, output());
+ break;
+ case loco::DataType::S16:
+ OneHotComputeImpl<int16_t>(indices(), on_value(), off_value(), depth_value, axis, output());
+ break;
+ default:
+ // TODO Support other data types
+ throw std::runtime_error("Not supported, yet!");
+ break;
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/OneHot.h b/compiler/luci-interpreter/src/kernels/OneHot.h
new file mode 100644
index 000000000..572f857ae
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/OneHot.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_ONEHOT_H
+#define LUCI_INTERPRETER_KERNELS_ONEHOT_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class OneHot : public KernelWithParams<OneHotParams>
+{
+public:
+ OneHot(const Tensor *indices, const Tensor *depth, const Tensor *on_value,
+ const Tensor *off_value, Tensor *output, const OneHotParams &params);
+
+ const Tensor *indices() const { return _inputs[0]; }
+ const Tensor *depth() const { return _inputs[1]; }
+ const Tensor *on_value() const { return _inputs[2]; }
+ const Tensor *off_value() const { return _inputs[3]; }
+
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_ONEHOT_H
diff --git a/compiler/luci-interpreter/src/kernels/OneHot.test.cpp b/compiler/luci-interpreter/src/kernels/OneHot.test.cpp
new file mode 100644
index 000000000..45b6968fa
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/OneHot.test.cpp
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/OneHot.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T1, typename T2>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+ std::initializer_list<T1> input_data, std::initializer_list<int32_t> depth_data,
+ std::initializer_list<T2> on_value_data, std::initializer_list<T2> off_value_data,
+ int32_t axis, std::initializer_list<T2> output_data)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ constexpr auto input_type = getElementType<T1>();
+ constexpr auto output_type = getElementType<T2>();
+
+ Tensor input_tensor = makeInputTensor<input_type>(input_shape, input_data, memory_manager.get());
+ Tensor depth_tensor = makeInputTensor<DataType::S32>({}, depth_data, memory_manager.get());
+ Tensor on_value_tensor = makeInputTensor<output_type>({}, on_value_data, memory_manager.get());
+ Tensor off_value_tensor = makeInputTensor<output_type>({}, off_value_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(output_type);
+
+ OneHotParams params{};
+ params.axis = axis;
+
+ OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor,
+ params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+ EXPECT_THAT(extractTensorData<T2>(output_tensor), ::testing::ElementsAreArray(output_data));
+}
+
+template <typename T> class OneHotTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t, int16_t>;
+TYPED_TEST_SUITE(OneHotTest, DataTypes);
+
+TYPED_TEST(OneHotTest, BasicPattern)
+{
+ // axis 0
+ Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{4, 2, 3},
+ /*input_data=*/
+ {
+ 0, 3, 5, //
+ 7, 3, 0, //
+ },
+ /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0},
+ /*axis=*/0,
+ /*output_data=*/
+ {
+ 1, 0, 0, //
+ 0, 0, 1, //
+
+ 0, 0, 0, //
+ 0, 0, 0, //
+
+ 0, 0, 0, //
+ 0, 0, 0, //
+
+ 0, 1, 0, //
+ 0, 1, 0, //
+ });
+ // axis 1
+ Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 4, 3},
+ /*input_data=*/
+ {
+ 0, 3, 5, //
+ 7, 3, 0, //
+ },
+ /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0},
+ /*axis=*/1,
+ /*output_data=*/
+ {
+ 1, 0, 0, //
+ 0, 0, 0, //
+ 0, 0, 0, //
+ 0, 1, 0, //
+
+ 0, 0, 1, //
+ 0, 0, 0, //
+ 0, 0, 0, //
+ 0, 1, 0, //
+ });
+ // axis -1
+ Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 3, 4},
+ /*input_data=*/
+ {
+ 0, 3, 5, //
+ 7, 3, 0, //
+ },
+ /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0},
+ /*axis=*/-1,
+ /*output_data=*/
+ {
+ 1, 0, 0, 0, //
+ 0, 0, 0, 1, //
+ 0, 0, 0, 0, //
+
+ 0, 0, 0, 0, //
+ 0, 0, 0, 1, //
+ 1, 0, 0, 0, //
+ });
+}
+
+TEST(OneHotTest, UnsupportedInputType_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ // input type should be integer
+ Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {0}, memory_manager.get());
+
+ Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get());
+ Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get());
+ Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ OneHotParams params = {-1};
+
+ OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor,
+ params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(OneHotTest, OutputTypeMismatch_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor = makeInputTensor<DataType::S32>({1}, {0}, memory_manager.get());
+ Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get());
+
+ // type of on_value, off_value and output_tensor should be same
+ Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get());
+ Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16);
+
+ OneHotParams params = {-1};
+
+ OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor,
+ params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(OneHotTest, InvalidAxis_NEG)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+ Tensor input_tensor = makeInputTensor<DataType::S32>({1}, {0}, memory_manager.get());
+ Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get());
+ Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get());
+ Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ // axis should be in [-1, input_shape.rank]
+ OneHotParams params = {-2};
+
+ OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor,
+ params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Pack.test.cpp b/compiler/luci-interpreter/src/kernels/Pack.test.cpp
index 90a0f894e..2404e4303 100644
--- a/compiler/luci-interpreter/src/kernels/Pack.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Pack.test.cpp
@@ -80,7 +80,7 @@ template <typename T> class PackTest : public ::testing::Test
};
using DataTypes = ::testing::Types<uint8_t, float>;
-TYPED_TEST_CASE(PackTest, DataTypes);
+TYPED_TEST_SUITE(PackTest, DataTypes);
TYPED_TEST(PackTest, ThreeInputs)
{
diff --git a/compiler/luci-interpreter/src/kernels/Pad.cpp b/compiler/luci-interpreter/src/kernels/Pad.cpp
index 700448e7a..fe172884b 100644
--- a/compiler/luci-interpreter/src/kernels/Pad.cpp
+++ b/compiler/luci-interpreter/src/kernels/Pad.cpp
@@ -93,6 +93,16 @@ void Pad::execute() const
getTensorData<uint8_t>(output()));
break;
}
+ case DataType::S8:
+ {
+ assert(output()->zero_point() >= std::numeric_limits<int8_t>::min());
+ assert(output()->zero_point() <= std::numeric_limits<int8_t>::max());
+ const auto pad_value = static_cast<int8_t>(output()->zero_point());
+ tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<int8_t>(input()),
+ &pad_value, getTensorShape(output()),
+ getTensorData<int8_t>(output()));
+ break;
+ }
default:
throw std::runtime_error("Unsupported type.");
}
diff --git a/compiler/luci-interpreter/src/kernels/Pad.test.cpp b/compiler/luci-interpreter/src/kernels/Pad.test.cpp
index 7994263e2..dd3ce947c 100644
--- a/compiler/luci-interpreter/src/kernels/Pad.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Pad.test.cpp
@@ -54,6 +54,32 @@ TEST(Pad, Uint8)
EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 7, 1}));
}
+TEST(Pad, Int8)
+{
+ std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+ float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+ std::pair<float, int32_t> quant_param = quantizationParams<int8_t>(-1.0f, 1.0f);
+ std::vector<float> input_data{-0.2, 0.4, 0.5, -0.7, -0.1, -0.9, 0.7, 0.1, 0.2};
+ std::vector<int32_t> paddings_data{0, 0, 1, 2, 2, 1, 0, 0};
+ Tensor input_tensor = makeInputTensor<DataType::S8>(
+ {1, 3, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+ Tensor paddings_tensor =
+ makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S8, quant_param.first, quant_param.second);
+
+ Pad kernel(&input_tensor, &paddings_tensor, &output_tensor);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{0, 0, 0, 0, 0, 0, 0, 0, -0.2, 0.4, 0.5, 0,
+ 0, 0, -0.7, -0.1, -0.9, 0, 0, 0, 0.7, 0.1, 0.2, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+ EXPECT_THAT(dequantizeTensorData(output_tensor),
+ FloatArrayNear(ref_output_data, kQuantizedTolerance));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 6, 6, 1}));
+}
+
TEST(Pad, Float)
{
std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
diff --git a/compiler/luci-interpreter/src/kernels/Quantize.cpp b/compiler/luci-interpreter/src/kernels/Quantize.cpp
new file mode 100644
index 000000000..0c8544a65
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Quantize.cpp
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Quantize.h"
+#include "kernels/Utils.h"
+#include "PALQuantize.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+
+template <typename input_dtype> void call_requantize(const Tensor *input, Tensor *output)
+{
+ int32_t multiplier;
+ int shift;
+
+ const double effective_output_scale = input->scale() / output->scale();
+ quantizeMultiplier(effective_output_scale, &multiplier, &shift);
+
+ const auto input_shape = getTensorShape(input);
+ const auto output_shape = getTensorShape(output);
+ const auto size = tflite::MatchingFlatSize(input_shape, output_shape);
+
+ const auto input_data = getTensorData<input_dtype>(input);
+
+ switch (output->element_type())
+ {
+ case loco::DataType::S8:
+ luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
+ output->zero_point(), getTensorData<int8_t>(output));
+ break;
+ case loco::DataType::U8:
+ luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
+ output->zero_point(), getTensorData<uint8_t>(output));
+ break;
+ case loco::DataType::S16:
+ luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
+ output->zero_point(), getTensorData<int16_t>(output));
+ break;
+ default:
+ throw std::runtime_error("Unsupported quantized type, yet!");
+ }
+}
+
+} // namespace
+
+Quantize::Quantize(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Quantize::configure()
+{
+
+ if (input()->element_type() == loco::DataType::S16)
+ LUCI_INTERPRETER_CHECK(input()->zero_point() == 0);
+
+ switch (input()->element_type())
+ {
+ case loco::DataType::FLOAT32:
+ {
+ LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::U8 ||
+ output()->element_type() == loco::DataType::S8 ||
+ output()->element_type() == loco::DataType::S16);
+ break;
+ }
+ case loco::DataType::S16:
+ case loco::DataType::S8:
+ case loco::DataType::U8:
+ {
+ LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::S8 ||
+ output()->element_type() == loco::DataType::U8 ||
+ output()->element_type() == loco::DataType::S16);
+ if (output()->element_type() == loco::DataType::S16)
+ {
+ LUCI_INTERPRETER_CHECK(output()->zero_point() == 0);
+ }
+ break;
+ }
+ default:
+ throw std::runtime_error("Unsupported type");
+ }
+
+ output()->resize(input()->shape());
+}
+
+void Quantize::execute() const
+{
+ switch (input()->element_type())
+ {
+ case loco::DataType::FLOAT32:
+ {
+ tflite::QuantizationParams op_params;
+ op_params.zero_point = output()->zero_point();
+ op_params.scale = output()->scale();
+ const auto input_data = getTensorData<float>(input());
+
+ switch (output()->element_type())
+ {
+ case loco::DataType::S8:
+ {
+ luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
+ getTensorShape(output()), getTensorData<int8_t>(output()));
+ break;
+ }
+ case loco::DataType::U8:
+ {
+ luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
+ getTensorShape(output()),
+ getTensorData<uint8_t>(output()));
+ break;
+ }
+ case loco::DataType::S16:
+ {
+ luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
+ getTensorShape(output()),
+ getTensorData<int16_t>(output()));
+ break;
+ }
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+ break;
+ }
+ case loco::DataType::S16:
+ {
+ call_requantize<int16_t>(input(), output());
+ break;
+ }
+ case loco::DataType::S8:
+ {
+ call_requantize<int8_t>(input(), output());
+ break;
+ }
+ case loco::DataType::U8:
+ {
+ call_requantize<uint8_t>(input(), output());
+ break;
+ }
+ default:
+ throw std::runtime_error("Unsupported type.");
+ }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Quantize.h b/compiler/luci-interpreter/src/kernels/Quantize.h
new file mode 100644
index 000000000..006c5366f
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Quantize.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_QUANTIZE_H
+#define LUCI_INTERPRETER_KERNELS_QUANTIZE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Quantize : public Kernel
+{
+public:
+ Quantize(const Tensor *input, Tensor *output);
+
+ const Tensor *input() const { return _inputs[0]; }
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_QUANTIZE_H
diff --git a/compiler/luci-interpreter/src/kernels/Quantize.test.cpp b/compiler/luci-interpreter/src/kernels/Quantize.test.cpp
new file mode 100644
index 000000000..22e67fe3f
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Quantize.test.cpp
@@ -0,0 +1,254 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Quantize.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class QuantizeTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(QuantizeTest, FloatUint8)
+{
+ std::vector<float> input_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64};
+
+ std::vector<uint8_t> ref_output_data{0, 1, 2, 3, 4, 251, 252, 253, 254, 255};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, /*scale*/ 0.5, /*zero_point*/ 127);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(QuantizeTest, FloatInt8)
+{
+ std::vector<float> input_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64};
+
+ std::vector<int8_t> ref_output_data{-128, -127, -126, -125, -124, 123, 124, 125, 126, 127};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<int8_t>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(QuantizeTest, FloatInt16)
+{
+ std::vector<float> input_data{-63.5, -63, -3, -2, -1, 1, 2, 3, 63.5, 64};
+
+ std::vector<int16_t> ref_output_data{-12700, -12600, -600, -400, -200,
+ 200, 400, 600, 12700, 12800};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16, /*scale*/ 0.005, /*zero_point*/ 0);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<int16_t>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(QuantizeTest, Int16Int16)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ std::vector<int16_t> ref_output_data{2, 4, 6, 8, 10, 12, 14, 16, 18, 20};
+
+ Tensor input_tensor = makeInputTensor<DataType::S16>(
+ {1, 1, 2, 5}, /*scale*/ 1.0, /*zero_point*/ 0, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16, /*scale*/ 0.5, /*zero_point*/ 0);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<int16_t>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5}));
+}
+
+TEST_F(QuantizeTest, Int8Int8)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ std::vector<int8_t> ref_output_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
+
+ Tensor input_tensor = makeInputTensor<DataType::S8>(
+ {1, 1, 2, 5}, /*scale*/ 0.5, /*zero_point*/ -1, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<int8_t>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5}));
+}
+
+TEST_F(QuantizeTest, Uint8Uint8)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ std::vector<uint8_t> ref_output_data{129, 131, 133, 135, 137, 139, 141, 143, 145, 147};
+
+ Tensor input_tensor = makeInputTensor<DataType::U8>(
+ {1, 1, 2, 5}, /*scale*/ 0.5, /*zero_point*/ 127, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U8, /*scale*/ 0.5, /*zero_point*/ 127);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5}));
+}
+
+TEST_F(QuantizeTest, Int16Int8)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ std::vector<int8_t> ref_output_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
+
+ Tensor input_tensor = makeInputTensor<DataType::S16>(
+ {1, 1, 2, 5}, /*scale*/ 1.0, /*zero_point*/ 0, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<int8_t>(output_tensor),
+ ::testing::ElementsAreArray(ref_output_data));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5}));
+}
+
+TEST_F(QuantizeTest, InvalidInputType_NEG)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S32>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidOutputTypeForFloatInput_NEG)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>({1, 1, 2, 5}, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidOutputTypeForInt16Input_NEG)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidOutputTypeForInt8Input_NEG)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S8>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidOutputTypeForUint8Input_NEG)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::U8>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidInputZeroPoint_NEG)
+{
+ std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S16>({1, 1, 2, 5}, 0.5, -1, input_data, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+
+ Quantize kernel(&input_tensor, &output_tensor);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp b/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp
index 7af20f8c4..933a1128c 100644
--- a/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp
@@ -90,7 +90,7 @@ template <typename T> class ResizeBilinearTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(ResizeBilinearTest, DataTypes);
+TYPED_TEST_SUITE(ResizeBilinearTest, DataTypes);
TYPED_TEST(ResizeBilinearTest, SimpleTest)
{
diff --git a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp
index 0e9017c78..7ade02a6f 100644
--- a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp
@@ -92,7 +92,7 @@ template <typename T> class ResizeNearestNeighborTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(ResizeNearestNeighborTest, DataTypes);
+TYPED_TEST_SUITE(ResizeNearestNeighborTest, DataTypes);
TYPED_TEST(ResizeNearestNeighborTest, SimpleTest)
{
diff --git a/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp b/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp
index 2bd94875b..c0025faca 100644
--- a/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/ReverseV2.test.cpp
@@ -33,7 +33,7 @@ template <typename T> class ReverseV2Test : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(ReverseV2Test, DataTypes);
+TYPED_TEST_SUITE(ReverseV2Test, DataTypes);
TYPED_TEST(ReverseV2Test, MultiDimensions)
{
diff --git a/compiler/luci-interpreter/src/kernels/SVDF.cpp b/compiler/luci-interpreter/src/kernels/SVDF.cpp
new file mode 100644
index 000000000..40d79aaa3
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SVDF.cpp
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SVDF.h"
+#include "kernels/Utils.h"
+#include "PALSVDF.h"
+
+#include <tensorflow/lite/kernels/internal/quantization_util.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+TfLiteFusedActivation get_tflite_activation(Activation activation)
+{
+ switch (activation)
+ {
+ case luci::FusedActFunc::RELU:
+ return kTfLiteActRelu;
+ case luci::FusedActFunc::RELU6:
+ return kTfLiteActRelu6;
+ case luci::FusedActFunc::RELU_N1_TO_1:
+ return kTfLiteActReluN1To1;
+ case luci::FusedActFunc::TANH:
+ return kTfLiteActTanh;
+ case luci::FusedActFunc::SIGN_BIT:
+ return kTfLiteActSignBit;
+ case luci::FusedActFunc::NONE:
+ return kTfLiteActNone;
+ default:
+ throw std::runtime_error("Unsupported activation type");
+ }
+}
+} // namespace
+
+SVDF::SVDF(const Tensor *input, const Tensor *weight_feature, const Tensor *weight_time,
+ const Tensor *bias, const Tensor *input_activation_state, Tensor *output,
+ Tensor *scratchpad_activation_state, Tensor *scratchpad_1, Tensor *scratchpad_2,
+ Tensor *scratchpad_3, Tensor *scratchpad_4, Tensor *scratchpad_5, Tensor *scratchpad_6,
+ const SVDFParams &params)
+ : KernelWithParams<SVDFParams>({input, weight_feature, weight_time, bias, input_activation_state},
+ {output, scratchpad_activation_state, scratchpad_1, scratchpad_2,
+ scratchpad_3, scratchpad_4, scratchpad_5, scratchpad_6},
+ params)
+{
+ // Do nothing
+}
+
+void SVDF::configure()
+{
+ const Shape &input_shape = input()->shape();
+ const Shape &weight_features_shape = weight_feature()->shape();
+ const Shape &weight_time_shape = weight_time()->shape();
+
+ // Validate Input Tensor:
+ LUCI_INTERPRETER_CHECK(input()->element_type() == loco::DataType::FLOAT32 ||
+ input()->element_type() == loco::DataType::S8);
+ LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 2);
+
+ // Validate inputs and output types
+ if (input()->element_type() == loco::DataType::S8)
+ {
+ LUCI_INTERPRETER_CHECK(weight_feature()->element_type() == loco::DataType::S8);
+ LUCI_INTERPRETER_CHECK(weight_time()->element_type() == loco::DataType::S16 ||
+ weight_time()->element_type() == loco::DataType::S8);
+ if (bias())
+ LUCI_INTERPRETER_CHECK(bias()->element_type() == loco::DataType::S32);
+
+ LUCI_INTERPRETER_CHECK(input_activation_state()->element_type() == loco::DataType::S16 ||
+ input_activation_state()->element_type() == loco::DataType::S8);
+ LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::S8);
+
+ // Note: now tflite support only ReLU activation for integer SVDF
+ LUCI_INTERPRETER_CHECK(params().activation == luci::FusedActFunc::RELU);
+ }
+ else if (weight_feature()->element_type() == loco::DataType::FLOAT32)
+ {
+ LUCI_INTERPRETER_CHECK(weight_feature()->element_type() == loco::DataType::FLOAT32);
+ LUCI_INTERPRETER_CHECK(weight_time()->element_type() == loco::DataType::FLOAT32);
+ LUCI_INTERPRETER_CHECK(input_activation_state()->element_type() == loco::DataType::FLOAT32);
+ if (bias())
+ LUCI_INTERPRETER_CHECK(bias()->element_type() == loco::DataType::FLOAT32);
+ LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::FLOAT32);
+ }
+ else if ((weight_feature()->element_type() == loco::DataType::U8 ||
+ weight_feature()->element_type() == loco::DataType::S8) &&
+ input()->element_type() == loco::DataType::FLOAT32)
+ {
+ // TODO:: support hybrid SVDF op
+ throw std::runtime_error("Hybrid type is not currently supported");
+ }
+ else
+ {
+ throw std::runtime_error("Unsupported type.");
+ }
+
+ // Check all the parameters of tensor match within themselves and match the
+ // input configuration.
+ const int rank = params().svdf_rank;
+ const int batch_size = input_shape.dim(0);
+ const int num_filters = weight_features_shape.dim(0);
+ LUCI_INTERPRETER_CHECK(rank != 0);
+ LUCI_INTERPRETER_CHECK(num_filters % rank == 0);
+
+ const int num_units = num_filters / rank;
+ const int memory_size = weight_time_shape.dim(1);
+
+ // Validate Weight_Feature Input Tensor:
+ LUCI_INTERPRETER_CHECK(weight_features_shape.num_dims() == 2);
+ LUCI_INTERPRETER_CHECK(weight_features_shape.dim(1) == input_shape.dim(1));
+
+ // Validate Weight_Time Input Tensor:
+ LUCI_INTERPRETER_CHECK(weight_time_shape.num_dims() == 2);
+ LUCI_INTERPRETER_CHECK(weight_time_shape.dim(0) == num_filters);
+
+ // Validate Bias
+ if (bias())
+ LUCI_INTERPRETER_CHECK(bias()->shape().dim(0) == num_units);
+
+ // Validate Input Activation State
+ LUCI_INTERPRETER_CHECK(input_activation_state()->shape().num_dims() == 2);
+ LUCI_INTERPRETER_CHECK(input_activation_state()->shape().dim(0) == batch_size);
+ LUCI_INTERPRETER_CHECK(input_activation_state()->shape().dim(1) == memory_size * num_filters);
+
+ // Resize scratchpad_state to input_activation_state
+ auto scratchpad_activation_state = getOutputTensors()[1];
+ scratchpad_activation_state->resize({batch_size, memory_size * num_filters});
+
+ // Resize output tensor
+ output()->resize({batch_size, num_units});
+
+ luci_interpreter_pal::SetupScratchpadTensor(
+ input()->element_type(), weight_feature()->element_type(), getOutputTensors()[2],
+ getOutputTensors()[3], getOutputTensors()[4], getOutputTensors()[5], getOutputTensors()[6],
+ getOutputTensors()[7], input_shape, weight_time_shape, batch_size, num_filters, num_units);
+}
+
+void SVDF::execute() const
+{
+ switch (weight_feature()->element_type())
+ {
+ case loco::DataType::FLOAT32:
+ evalFloat();
+ break;
+ case loco::DataType::S8:
+ {
+ if (input()->element_type() == loco::DataType::S8)
+ evalInteger();
+ else
+ // TODO:: support hybrid SVDF op
+ throw std::runtime_error("Hybrid type is not currently supported");
+ break;
+ }
+ default:
+ throw std::runtime_error("Unsupported type");
+ }
+}
+
+void SVDF::evalInteger() const
+{
+ const auto effective_scale_1 = static_cast<double>(input()->scale() * weight_feature()->scale() /
+ input_activation_state()->scale());
+ const auto effective_scale_2 = static_cast<double>(input_activation_state()->scale() *
+ weight_time()->scale() / output()->scale());
+
+ int32_t effective_scale_1_a;
+ int effective_scale_1_b;
+ int32_t effective_scale_2_a;
+ int effective_scale_2_b;
+
+ tflite::QuantizeMultiplier(effective_scale_1, &effective_scale_1_a, &effective_scale_1_b);
+ tflite::QuantizeMultiplier(effective_scale_2, &effective_scale_2_a, &effective_scale_2_b);
+
+ TfLiteSVDFParams params_svdf{};
+ params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs;
+ params_svdf.rank = params().svdf_rank;
+ params_svdf.activation = get_tflite_activation(params().activation);
+
+ auto scratchpad_activation_state = getOutputTensors()[1];
+ // Note: it is expected that activation_state input variable tensor reset to zero,
+ // also expected that this variable tensor doesn't have buffer
+ auto scratchpad_data = getTensorData<int16_t>(scratchpad_activation_state);
+ std::fill_n(scratchpad_data, scratchpad_activation_state->shape().num_elements(), 0);
+
+ auto scratchpad = getOutputTensors()[2];
+ auto output_temp = getOutputTensors()[3];
+
+ int32_t input_zp = input()->zero_point();
+ int32_t output_zp = output()->zero_point();
+ luci_interpreter_pal::IntegerSVDF(
+ params_svdf, getTensorShape(input()), getTensorData<int8_t>(input()),
+ getTensorShape(weight_feature()), getTensorData<int8_t>(weight_feature()),
+ getTensorShape(weight_time()), getTensorData<int16_t>(weight_time()), getTensorShape(bias()),
+ getTensorData<int32_t>(bias()), scratchpad_data, getTensorShape(output()),
+ getTensorData<int8_t>(output()), getTensorData<int32_t>(scratchpad),
+ getTensorData<int32_t>(output_temp), effective_scale_1_a, effective_scale_1_b,
+ effective_scale_2_a, effective_scale_2_b, input_zp, output_zp);
+}
+
+void SVDF::evalFloat() const
+{
+ TfLiteSVDFParams params_svdf{};
+ params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs;
+ params_svdf.rank = params().svdf_rank;
+ params_svdf.activation = get_tflite_activation(params().activation);
+
+ auto scratchpad_activation_state = getOutputTensors()[1];
+ // Note: it is expected that activation_state input variable tensor reset to zero,
+ // also expected that this variable tensor doesn't have buffer
+ auto scratchpad_data = getTensorData<float>(scratchpad_activation_state);
+ std::fill_n(scratchpad_data, scratchpad_activation_state->shape().num_elements(), 0);
+
+ auto scratchpad_1 = getOutputTensors()[2];
+
+ luci_interpreter_pal::FloatSVDF(
+ params_svdf, getTensorShape(input()), getTensorData<float>(input()),
+ getTensorShape(weight_feature()), getTensorData<float>(weight_feature()),
+ getTensorShape(weight_time()), getTensorData<float>(weight_time()), getTensorShape(bias()),
+ getTensorData<float>(bias()), getTensorData<float>(scratchpad_1), scratchpad_data,
+ getTensorShape(output()), getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/SVDF.h b/compiler/luci-interpreter/src/kernels/SVDF.h
new file mode 100644
index 000000000..335a6cd8f
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SVDF.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SVDF_H
+#define LUCI_INTERPRETER_KERNELS_SVDF_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SVDF : public KernelWithParams<SVDFParams>
+{
+public:
+ SVDF(const Tensor *input, const Tensor *weight_feature, const Tensor *weight_time,
+ const Tensor *bias, const Tensor *input_activation_state, Tensor *output,
+ Tensor *scratchpad_activation_state, Tensor *scratchpad_1, Tensor *scratchpad_2,
+ Tensor *scratchpad_3, Tensor *scratchpad_4, Tensor *scratchpad_5, Tensor *scratchpad_6,
+ const SVDFParams &params);
+
+ const Tensor *input() const { return _inputs[0]; }
+ const Tensor *weight_feature() const { return _inputs[1]; }
+ const Tensor *weight_time() const { return _inputs[2]; }
+ const Tensor *bias() const { return _inputs[3]; }
+ const Tensor *input_activation_state() const { return _inputs[4]; }
+
+ Tensor *output() const { return _outputs[0]; }
+
+ void configure() override;
+ void execute() const override;
+
+private:
+ void evalFloat() const;
+ void evalInteger() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SVDF_H
diff --git a/compiler/luci-interpreter/src/kernels/SVDF.test.cpp b/compiler/luci-interpreter/src/kernels/SVDF.test.cpp
new file mode 100644
index 000000000..82bd9b009
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SVDF.test.cpp
@@ -0,0 +1,341 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SVDF.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class SVDFTest : public ::testing::Test
+{
+protected:
+ void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+ std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(SVDFTest, FullIntegerTest)
+{
+ const int32_t batches = 2;
+ const int32_t input_size = 3;
+ const int32_t units = 4;
+ const int32_t memory_size = 10;
+ const int32_t rank = 1;
+ const int32_t num_filters = units * rank;
+
+ Shape input_shape{batches, input_size};
+ Shape weight_feature_shape{num_filters, input_size};
+ Shape weight_time_shape{num_filters, memory_size};
+ Shape bias_shape{units};
+ Shape activation_state_shape{batches, memory_size * num_filters};
+
+ std::vector<float> input_data{0.49837467, 0.19278903, 0.26584083,
+ 0.17660543, 0.52949083, -0.77931279};
+
+ std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667, 0.37613347,
+ 0.22197971, 0.12416199, 0.27901134, 0.27557442,
+ 0.3905206, -0.36137494, -0.06634006, -0.10640851};
+
+ std::vector<float> weight_time_data{
+ -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156,
+ 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199,
+
+ 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518,
+ -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296,
+
+ -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236,
+ 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846,
+
+ -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166,
+ -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657};
+
+ std::vector<float> bias_data{-0.0976817, 0.15294972, 0.39635518, -0.02702999};
+
+ std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-1, 1);
+ std::pair<float, int32_t> weight_feature_quant_param = quantizationParams<int8_t>(-0.5, 0.5);
+ std::pair<float, int32_t> weight_time_quant_param = quantizationParams<int16_t>(-1, 1);
+ std::pair<float, int32_t> bias_quant_param = quantizationParams<int32_t>(-512, 512);
+ std::pair<float, int32_t> activation_state_quant_param = quantizationParams<int16_t>(-16, 16);
+
+ std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-0.5, 0.5);
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second,
+ input_data, _memory_manager.get());
+ Tensor weight_feature_tensor = makeInputTensor<DataType::S8>(
+ weight_feature_shape, weight_feature_quant_param.first, weight_feature_quant_param.second,
+ weight_feature_data, _memory_manager.get());
+ Tensor weight_time_tensor = makeInputTensor<DataType::S16>(
+ weight_time_shape, weight_time_quant_param.first, weight_time_quant_param.second,
+ weight_time_data, _memory_manager.get());
+ Tensor bias_tensor = makeInputTensor<DataType::S32>(
+ bias_shape, bias_quant_param.first, bias_quant_param.second, bias_data, _memory_manager.get());
+ Tensor activation_state_tensor = makeOutputTensor(
+ DataType::S16, activation_state_quant_param.first, activation_state_quant_param.second);
+ activation_state_tensor.resize(activation_state_shape);
+ Tensor output_tensor =
+ makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+
+ Tensor scratchpad_activation_state(DataType::S16, Shape({}), {}, "");
+ Tensor scratchpad_1(DataType::S32, Shape({}), {}, "");
+ Tensor scratchpad_2(DataType::S32, Shape({}), {}, "");
+ Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, "");
+
+ SVDFParams params{};
+ params.activation = Activation::RELU;
+ params.asymmetric_quantize_inputs = false;
+ params.svdf_rank = rank;
+
+ SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, &bias_tensor,
+ &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1,
+ &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(scratchpad_activation_state);
+ _memory_manager->allocate_memory(scratchpad_1);
+ _memory_manager->allocate_memory(scratchpad_2);
+ _memory_manager->allocate_memory(scratchpad_3);
+ _memory_manager->allocate_memory(scratchpad_4);
+ _memory_manager->allocate_memory(scratchpad_5);
+ _memory_manager->allocate_memory(scratchpad_6);
+ kernel.execute();
+
+ std::vector<int8_t> ref_output_data{-9, 24, 31, 1, -10, 10, -3, 0};
+
+ std::vector<int32_t> ref_output_shape{batches, units};
+ EXPECT_THAT(extractTensorData<int8_t>(output_tensor), ref_output_data);
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(SVDFTest, FloatTest)
+{
+ const int32_t batches = 2;
+ const int32_t input_size = 3;
+ const int32_t units = 4;
+ const int32_t memory_size = 10;
+ const int32_t rank = 1;
+ const int32_t num_filters = units * rank;
+
+ Shape input_shape{batches, input_size};
+ Shape weight_feature_shape{num_filters, input_size};
+ Shape weight_time_shape{num_filters, memory_size};
+ Shape activation_state_shape{batches, memory_size * num_filters};
+
+ std::vector<float> input_data{0.12609188, -0.46347019, -0.89598465,
+ 0.35867718, 0.36897406, 0.73463392};
+
+ std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667, 0.37613347,
+ 0.22197971, 0.12416199, 0.27901134, 0.27557442,
+ 0.3905206, -0.36137494, -0.06634006, -0.10640851};
+
+ std::vector<float> weight_time_data{
+ -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156,
+ 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199,
+
+ 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518,
+ -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296,
+
+ -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236,
+ 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846,
+
+ -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166,
+ -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>(
+ weight_feature_shape, weight_feature_data, _memory_manager.get());
+ Tensor weight_time_tensor =
+ makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get());
+ Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32);
+ activation_state_tensor.resize(activation_state_shape);
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, "");
+
+ SVDFParams params{};
+ params.activation = Activation::NONE;
+ params.asymmetric_quantize_inputs = false;
+ params.svdf_rank = rank;
+
+ SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr,
+ &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1,
+ &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params);
+ kernel.configure();
+ _memory_manager->allocate_memory(output_tensor);
+ _memory_manager->allocate_memory(scratchpad_activation_state);
+ _memory_manager->allocate_memory(scratchpad_1);
+ _memory_manager->allocate_memory(scratchpad_2);
+ _memory_manager->allocate_memory(scratchpad_3);
+ _memory_manager->allocate_memory(scratchpad_4);
+ _memory_manager->allocate_memory(scratchpad_5);
+ _memory_manager->allocate_memory(scratchpad_6);
+ kernel.execute();
+
+ std::vector<float> ref_output_data{0.014899, -0.0517661, -0.143725, -0.00271883,
+ -0.03004015, 0.09565311, 0.1587342, 0.00784263};
+
+ std::vector<float> ref_output_shape{batches, units};
+ const float tolerance = 1e-5;
+ EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, tolerance));
+ EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(SVDFTest, Unsupported_Type_Configure_NEG)
+{
+ const int32_t batches = 2;
+ const int32_t input_size = 3;
+ const int32_t units = 4;
+ const int32_t memory_size = 10;
+ const int32_t rank = 1;
+ const int32_t num_filters = units * rank;
+
+ Shape input_shape{batches, input_size};
+ Shape weight_feature_shape{num_filters, input_size};
+ Shape weight_time_shape{num_filters, memory_size};
+ Shape activation_state_shape{batches, memory_size * num_filters};
+
+ std::vector<int32_t> input_data{0, 1, 3, 4, 4, -2};
+
+ std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667, 0.37613347,
+ 0.22197971, 0.12416199, 0.27901134, 0.27557442,
+ 0.3905206, -0.36137494, -0.06634006, -0.10640851};
+
+ std::vector<float> weight_time_data{
+ -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156,
+ 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199,
+
+ 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518,
+ -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296,
+
+ -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236,
+ 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846,
+
+ -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166,
+ -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+ Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>(
+ weight_feature_shape, weight_feature_data, _memory_manager.get());
+ Tensor weight_time_tensor =
+ makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get());
+ Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32);
+ activation_state_tensor.resize(activation_state_shape);
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, "");
+
+ SVDFParams params{};
+ params.activation = Activation::NONE;
+ params.asymmetric_quantize_inputs = false;
+ params.svdf_rank = rank;
+
+ SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr,
+ &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1,
+ &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(SVDFTest, Invalid_Input_Shape_NEG)
+{
+ const int32_t batches = 2;
+ const int32_t right_input_size = 3;
+ const int32_t wrong_input_size = 4;
+ const int32_t units = 4;
+ const int32_t memory_size = 10;
+ const int32_t rank = 1;
+ const int32_t num_filters = units * rank;
+
+ Shape input_shape{batches, wrong_input_size};
+ Shape weight_feature_shape{num_filters, right_input_size};
+ Shape weight_time_shape{num_filters, memory_size};
+ Shape activation_state_shape{batches, memory_size * num_filters};
+
+ std::vector<float> input_data{0, 1, 3, 2, 4, 4, -2, 1};
+
+ std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667, 0.37613347,
+ 0.22197971, 0.12416199, 0.27901134, 0.27557442,
+ 0.3905206, -0.36137494, -0.06634006, -0.10640851};
+
+ std::vector<float> weight_time_data{
+ -0.31930989, 0.37613347, 0.27901134, -0.36137494, -0.36118156,
+ 0.22197971, 0.27557442, -0.06634006, 0.0079667, 0.12416199,
+
+ 0.3905206, -0.10640851, -0.0976817, 0.15294972, 0.39635518,
+ -0.02702999, 0.39296314, 0.15785322, 0.21931258, 0.31053296,
+
+ -0.36916667, 0.38031587, -0.21580373, 0.27072677, 0.23622236,
+ 0.34936687, 0.18174365, 0.35907319, -0.17493086, 0.324846,
+
+ -0.10781813, 0.27201805, 0.14324132, -0.23681851, -0.27115166,
+ -0.01580888, -0.14943552, 0.15465137, 0.09784451, -0.0337657};
+
+ Tensor input_tensor =
+ makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+ Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>(
+ weight_feature_shape, weight_feature_data, _memory_manager.get());
+ Tensor weight_time_tensor =
+ makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get());
+ Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32);
+ activation_state_tensor.resize(activation_state_shape);
+ Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+ Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, "");
+ Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, "");
+
+ SVDFParams params{};
+ params.activation = Activation::NONE;
+ params.asymmetric_quantize_inputs = false;
+ params.svdf_rank = rank;
+
+ SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr,
+ &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1,
+ &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Slice.cpp b/compiler/luci-interpreter/src/kernels/Slice.cpp
index 37a834a18..2fe2c5471 100644
--- a/compiler/luci-interpreter/src/kernels/Slice.cpp
+++ b/compiler/luci-interpreter/src/kernels/Slice.cpp
@@ -139,6 +139,11 @@ void Slice::execute() const
getTensorData<uint8_t>(input()), getTensorShape(output()),
getTensorData<uint8_t>(output()));
break;
+ case DataType::S8:
+ luci_interpreter_pal::Slice(op_params, getTensorShape(input()),
+ getTensorData<int8_t>(input()), getTensorShape(output()),
+ getTensorData<int8_t>(output()));
+ break;
default:
throw std::runtime_error("Unsupported input type.");
}
diff --git a/compiler/luci-interpreter/src/kernels/Slice.test.cpp b/compiler/luci-interpreter/src/kernels/Slice.test.cpp
index 3e0d0b0d7..517982990 100644
--- a/compiler/luci-interpreter/src/kernels/Slice.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Slice.test.cpp
@@ -31,8 +31,8 @@ template <typename T> class SliceTest : public ::testing::Test
{
};
-using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(SliceTest, DataTypes);
+using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
+TYPED_TEST_SUITE(SliceTest, DataTypes);
TYPED_TEST(SliceTest, SimpleTest)
{
diff --git a/compiler/luci-interpreter/src/kernels/Softmax.test.cpp b/compiler/luci-interpreter/src/kernels/Softmax.test.cpp
index 9de40b6ec..08e70672d 100644
--- a/compiler/luci-interpreter/src/kernels/Softmax.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Softmax.test.cpp
@@ -93,7 +93,7 @@ template <typename T> class SoftmaxTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
-TYPED_TEST_CASE(SoftmaxTest, DataTypes);
+TYPED_TEST_SUITE(SoftmaxTest, DataTypes);
TYPED_TEST(SoftmaxTest, Simple)
{
diff --git a/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp
index e06501c8c..3a8b0a812 100644
--- a/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp
@@ -90,7 +90,7 @@ template <typename T> class SpaceToBatchNDTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(SpaceToBatchNDTest, DataTypes);
+TYPED_TEST_SUITE(SpaceToBatchNDTest, DataTypes);
TYPED_TEST(SpaceToBatchNDTest, Simple)
{
diff --git a/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp b/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp
index 735c010b9..4af488618 100644
--- a/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp
@@ -32,7 +32,7 @@ template <typename T> class SpaceToDepthTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(SpaceToDepthTest, DataTypes);
+TYPED_TEST_SUITE(SpaceToDepthTest, DataTypes);
TYPED_TEST(SpaceToDepthTest, SimpleCase)
{
diff --git a/compiler/luci-interpreter/src/kernels/Split.test.cpp b/compiler/luci-interpreter/src/kernels/Split.test.cpp
index 74d57aed3..283cd9aa9 100644
--- a/compiler/luci-interpreter/src/kernels/Split.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Split.test.cpp
@@ -73,7 +73,7 @@ template <typename T> class SplitTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(SplitTest, DataTypes);
+TYPED_TEST_SUITE(SplitTest, DataTypes);
TYPED_TEST(SplitTest, FourDimensional)
{
diff --git a/compiler/luci-interpreter/src/kernels/SplitV.test.cpp b/compiler/luci-interpreter/src/kernels/SplitV.test.cpp
index aac0567d7..035bc2122 100644
--- a/compiler/luci-interpreter/src/kernels/SplitV.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/SplitV.test.cpp
@@ -77,7 +77,7 @@ template <typename T> class SplitVTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t, int16_t>;
-TYPED_TEST_CASE(SplitVTest, DataTypes);
+TYPED_TEST_SUITE(SplitVTest, DataTypes);
TYPED_TEST(SplitVTest, ThreeDimensional)
{
diff --git a/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp b/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp
index d3326fe98..1bc0b6459 100644
--- a/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp
@@ -56,7 +56,7 @@ template <typename T> class SqueezeTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(SqueezeTest, DataTypes);
+TYPED_TEST_SUITE(SqueezeTest, DataTypes);
TYPED_TEST(SqueezeTest, TotalTest)
{
diff --git a/compiler/luci-interpreter/src/kernels/Sub.cpp b/compiler/luci-interpreter/src/kernels/Sub.cpp
index 603c62d0f..24b6a72e5 100644
--- a/compiler/luci-interpreter/src/kernels/Sub.cpp
+++ b/compiler/luci-interpreter/src/kernels/Sub.cpp
@@ -37,6 +37,7 @@ Sub::Sub(const Tensor *input1, const Tensor *input2, Tensor *output, const SubPa
void Sub::configure()
{
LUCI_INTERPRETER_CHECK(!(input1()->element_type() != input2()->element_type()))
+ LUCI_INTERPRETER_CHECK(!(input1()->element_type() != output()->element_type()))
output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
}
@@ -47,6 +48,12 @@ void Sub::execute() const
case DataType::FLOAT32:
evalFloat();
break;
+ case DataType::S64:
+ evalInteger<int64_t>();
+ break;
+ case DataType::S32:
+ evalInteger<int32_t>();
+ break;
case DataType::U8:
evalQuantized();
break;
@@ -57,13 +64,8 @@ void Sub::execute() const
void Sub::evalFloat() const
{
- float activation_min{};
- float activation_max{};
- calculateActivationRange(_params.activation, &activation_min, &activation_max);
-
tflite::ArithmeticParams params{};
- params.float_activation_min = activation_min;
- params.float_activation_max = activation_max;
+ fillArithmeticActivationRange<float>(params, _params.activation);
const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
getTensorShape(input1()), getTensorShape(input2()), &params);
@@ -82,6 +84,28 @@ void Sub::evalFloat() const
}
}
+template <typename T> void Sub::evalInteger() const
+{
+ tflite::ArithmeticParams params{};
+ fillArithmeticActivationRange<T>(params, _params.activation);
+
+ const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+ getTensorShape(input1()), getTensorShape(input2()), &params);
+
+ if (need_broadcast)
+ {
+ tflite::reference_ops::BroadcastSubSlow(
+ params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+ getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+ }
+ else
+ {
+ tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData<T>(input1()),
+ getTensorShape(input2()), getTensorData<T>(input2()),
+ getTensorShape(output()), getTensorData<T>(output()));
+ }
+}
+
void Sub::evalQuantized() const
{
const auto input1_scale = static_cast<double>(input1()->scale());
diff --git a/compiler/luci-interpreter/src/kernels/Sub.h b/compiler/luci-interpreter/src/kernels/Sub.h
index d7940b5c6..23952b3bd 100644
--- a/compiler/luci-interpreter/src/kernels/Sub.h
+++ b/compiler/luci-interpreter/src/kernels/Sub.h
@@ -39,6 +39,7 @@ public:
private:
void evalFloat() const;
+ template <typename T> void evalInteger() const;
void evalQuantized() const;
};
diff --git a/compiler/luci-interpreter/src/kernels/Sub.test.cpp b/compiler/luci-interpreter/src/kernels/Sub.test.cpp
index c189f4481..9abafd49a 100644
--- a/compiler/luci-interpreter/src/kernels/Sub.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Sub.test.cpp
@@ -162,6 +162,51 @@ TEST_F(SubTest, Float)
}
}
+template <loco::DataType DType> void CheckInteger(luci_interpreter::IMemoryManager *memory_manager)
+{
+ using dtype = typename loco::DataTypeImpl<DType>::Type;
+ Shape base_shape = {2, 3, 1, 2};
+ std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+ std::vector<std::vector<dtype>> test_outputs = {
+ {0, 1, 2, 3, 0, 0, 0, 0, 4, 1, 0, 0, 0, 0, 7, 0, 3, 0,
+ 0, 2, 4, 4, 0, 0, 3, 0, 10, 0, 6, 0, 3, 0, 10, 2, 6, 0},
+ {0, 1, 4, 1, 3, 0, 0, 2, 10, 0, 6, 0},
+ {0, 0, 0, 1, 2, 5, 0, 0, 0, 0, 4, 3, 0, 0, 3, 0, 7, 0,
+ 2, 4, 0, 2, 0, 0, 8, 0, 6, 0, 1, 0, 8, 2, 6, 0, 1, 0},
+ {0, 0, 0, 0, 7, 0, 2, 4, 6, 0, 1, 0}};
+ std::vector<dtype> input1_data{-1, 2, 1, 0, 4, -5, 1, 3, 7, -1, 7, 1};
+ std::vector<dtype> input2_data{4, 1, -3, -1, 1, 6};
+ for (size_t i = 0; i < test_shapes.size(); ++i)
+ {
+ Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+ Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+ Tensor output_tensor = makeOutputTensor(DType);
+
+ SubParams params{};
+ params.activation = Activation::RELU;
+
+ Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ kernel.configure();
+ memory_manager->allocate_memory(output_tensor);
+ kernel.execute();
+
+ EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+ << "With shape number " << i;
+ }
+};
+
+TEST_F(SubTest, SInt32)
+{
+ CheckInteger<loco::DataType::S32>(_memory_manager.get());
+ SUCCEED();
+}
+
+TEST_F(SubTest, SInt64)
+{
+ CheckInteger<loco::DataType::S64>(_memory_manager.get());
+ SUCCEED();
+}
+
TEST_F(SubTest, Input_Output_Type_NEG)
{
Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
@@ -175,11 +220,24 @@ TEST_F(SubTest, Input_Output_Type_NEG)
EXPECT_ANY_THROW(kernel.configure());
}
-TEST_F(SubTest, Invalid_Input_Type_NEG)
+TEST_F(SubTest, Invalid_Output_Type_NEG)
{
Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
- Tensor output_tensor = makeOutputTensor(DataType::S64);
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ SubParams params{};
+ params.activation = Activation::RELU;
+
+ Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(SubTest, Invalid_Input_Type_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::U64);
SubParams params{};
params.activation = Activation::RELU;
@@ -190,6 +248,19 @@ TEST_F(SubTest, Invalid_Input_Type_NEG)
EXPECT_ANY_THROW(kernel.execute());
}
+TEST_F(SubTest, Mismatching_Input_Int_Types_NEG)
+{
+ Tensor input1_tensor = makeInputTensor<DataType::S32>({1}, {1}, _memory_manager.get());
+ Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
+ Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+ SubParams params{};
+ params.activation = Activation::NONE;
+
+ Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+ EXPECT_ANY_THROW(kernel.configure());
+}
+
} // namespace
} // namespace kernels
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Transpose.test.cpp b/compiler/luci-interpreter/src/kernels/Transpose.test.cpp
index 107179910..43be8f8b9 100644
--- a/compiler/luci-interpreter/src/kernels/Transpose.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Transpose.test.cpp
@@ -52,7 +52,7 @@ template <typename T> class TransposeTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(TransposeTest, DataTypes);
+TYPED_TEST_SUITE(TransposeTest, DataTypes);
TYPED_TEST(TransposeTest, Small3D)
{
diff --git a/compiler/luci-interpreter/src/kernels/Unpack.test.cpp b/compiler/luci-interpreter/src/kernels/Unpack.test.cpp
index 4f22c9f30..9384ddc83 100644
--- a/compiler/luci-interpreter/src/kernels/Unpack.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Unpack.test.cpp
@@ -75,7 +75,7 @@ template <typename T> class UnpackTest : public ::testing::Test
};
using DataTypes = ::testing::Types<float, uint8_t>;
-TYPED_TEST_CASE(UnpackTest, DataTypes);
+TYPED_TEST_SUITE(UnpackTest, DataTypes);
TYPED_TEST(UnpackTest, ThreeOutputs)
{
diff --git a/compiler/luci-interpreter/src/kernels/Utils.cpp b/compiler/luci-interpreter/src/kernels/Utils.cpp
index 586cfa1e1..5d8e5db83 100644
--- a/compiler/luci-interpreter/src/kernels/Utils.cpp
+++ b/compiler/luci-interpreter/src/kernels/Utils.cpp
@@ -27,17 +27,18 @@ namespace luci_interpreter
namespace kernels
{
-void calculateActivationRange(Activation activation, float *activation_min, float *activation_max)
+template <typename T>
+void calculateActivationRange(Activation activation, T *activation_min, T *activation_max)
{
switch (activation)
{
case Activation::NONE:
- *activation_min = std::numeric_limits<float>::lowest();
- *activation_max = std::numeric_limits<float>::max();
+ *activation_min = std::numeric_limits<T>::lowest();
+ *activation_max = std::numeric_limits<T>::max();
break;
case Activation::RELU:
*activation_min = 0;
- *activation_max = std::numeric_limits<float>::max();
+ *activation_max = std::numeric_limits<T>::max();
break;
case Activation::RELU_N1_TO_1:
*activation_min = -1;
@@ -52,6 +53,13 @@ void calculateActivationRange(Activation activation, float *activation_min, floa
}
}
+template void calculateActivationRange(Activation activation, float *activation_min,
+ float *activation_max);
+template void calculateActivationRange(Activation activation, int32_t *activation_min,
+ int32_t *activation_max);
+template void calculateActivationRange(Activation activation, int64_t *activation_min,
+ int64_t *activation_max);
+
static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t qmin, int32_t qmax,
const Tensor *output, int32_t *activation_min,
int32_t *activation_max)
@@ -175,7 +183,11 @@ Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_
{
const int32_t input1_dim = i < num_input1_dims ? input1_shape.dim(num_input1_dims - i - 1) : 1;
const int32_t input2_dim = i < num_input2_dims ? input2_shape.dim(num_input2_dims - i - 1) : 1;
- assert(input1_dim == input2_dim || input1_dim == 1 || input2_dim == 1);
+
+ bool need_broadcast = input1_dim != input2_dim;
+ bool can_broadcast = input1_dim == 1 || input2_dim == 1;
+ LUCI_INTERPRETER_CHECK(!need_broadcast || can_broadcast);
+
output_shape.dim(num_out_dims - i - 1) = std::max(input1_dim, input2_dim);
}
diff --git a/compiler/luci-interpreter/src/kernels/Utils.h b/compiler/luci-interpreter/src/kernels/Utils.h
index 817a42f83..ebeb20e66 100644
--- a/compiler/luci-interpreter/src/kernels/Utils.h
+++ b/compiler/luci-interpreter/src/kernels/Utils.h
@@ -76,11 +76,42 @@ inline int32_t calcOffset(const Shape &shape, int32_t d0, int32_t d1, int32_t d2
return ((d0 * shape.dim(1) + d1) * shape.dim(2) + d2) * shape.dim(3) + d3;
}
-void calculateActivationRange(Activation activation, float *activation_min, float *activation_max);
+template <typename T>
+void calculateActivationRange(Activation activation, T *activation_min, T *activation_max);
void calculateActivationRangeQuantized(Activation activation, const Tensor *output,
int32_t *activation_min, int32_t *activation_max);
+template <typename T> constexpr bool one_of_types() { return false; }
+
+// Checks if T is equal to one of {U,Other} types
+template <typename T, typename U, typename... Other> constexpr bool one_of_types()
+{
+ return std::is_same<T, U>::value || one_of_types<T, Other...>();
+}
+
+/**
+ * Fills activation min and max parameters depending on given data type and activation
+ *
+ * T is a template parameter, so after optimization this code left with only required if case
+ *
+ * @tparam T data type of arithmetic operation output tensor
+ * @param params tflite params to fill
+ * @param activation luci_interpreter::Activation of arithmetic operation
+ */
+template <typename T>
+void fillArithmeticActivationRange(tflite::ArithmeticParams &p, Activation act)
+{
+ static_assert(one_of_types<T, float, int32_t, int64_t>(), "Unsupported dtype");
+
+ if (std::is_same<T, float>::value)
+ calculateActivationRange(act, &p.float_activation_min, &p.float_activation_max);
+ if (std::is_same<T, int32_t>::value)
+ calculateActivationRange(act, &p.quantized_activation_min, &p.quantized_activation_max);
+ else
+ calculateActivationRange(act, &p.int64_activation_min, &p.int64_activation_max);
+}
+
// Decompose a double multiplier into a Q0.31 int32 representation of its
// significand, and shift representation of its exponent.
//
diff --git a/compiler/luci-interpreter/src/loader/CMakeLists.txt b/compiler/luci-interpreter/src/loader/CMakeLists.txt
index 2cde99f5d..292771592 100644
--- a/compiler/luci-interpreter/src/loader/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/loader/CMakeLists.txt
@@ -17,7 +17,9 @@ endmacro(REGISTER_KERNEL)
include(${KERNEL_REGISTER_FILE})
add_library(${LUCI_INTERPRETER_LOADER} STATIC ${SOURCES})
-set_target_properties(${LUCI_INTERPRETER_LOADER} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+if (NOT NNCC_LIBRARY_NO_PIC)
+ set_target_properties(${LUCI_INTERPRETER_LOADER} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_PAL_DIR}")
target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
diff --git a/compiler/luci-interpreter/src/loader/GraphLoader.cpp b/compiler/luci-interpreter/src/loader/GraphLoader.cpp
index a14442ed5..dba39050c 100644
--- a/compiler/luci-interpreter/src/loader/GraphLoader.cpp
+++ b/compiler/luci-interpreter/src/loader/GraphLoader.cpp
@@ -73,6 +73,26 @@ const void *getNodeData(const luci::CircleConst *node, size_t *data_size)
}
}
+const void *getNodeData(const luci::CircleCustom *node, size_t *data_size)
+{
+ if (node->custom_code() != "CircleReferencingConst")
+ return nullptr;
+
+ // helper struct which describes data loaded to custom_options of CircleReferencingConst node
+ // TODO move this struct to header
+ struct ConstDataReference
+ {
+ const uint8_t *data = nullptr;
+ uint32_t size = 0;
+ };
+
+ const auto &custom_options = node->custom_options();
+ const auto &const_data_ref = *reinterpret_cast<const ConstDataReference *>(custom_options.data());
+
+ *data_size = const_data_ref.size;
+ return const_data_ref.data;
+}
+
bool isExecutableNode(const luci::CircleNode *node)
{
switch (node->opcode())
@@ -83,12 +103,30 @@ bool isExecutableNode(const luci::CircleNode *node)
case luci::CircleOpcode::CIRCLEOUTPUT:
case luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE:
// The following nodes denote outputs of multiple-output nodes.
+ case luci::CircleOpcode::CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT:
+ case luci::CircleOpcode::CIRCLECUSTOMOUT:
case luci::CircleOpcode::CIRCLEIFOUT:
+ case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV4OUT:
+ case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT:
case luci::CircleOpcode::CIRCLESPLITOUT:
case luci::CircleOpcode::CIRCLESPLITVOUT:
+ case luci::CircleOpcode::CIRCLETOPKV2OUT:
+ case luci::CircleOpcode::CIRCLEUNIQUEOUT:
case luci::CircleOpcode::CIRCLEUNPACKOUT:
+ case luci::CircleOpcode::CIRCLEVARIABLE:
case luci::CircleOpcode::CIRCLEWHILEOUT:
return false;
+ // Custom nodes may be executable and non-executable
+ case luci::CircleOpcode::CUSTOM:
+ {
+ auto const custom_node = loco::must_cast<const luci::CircleCustom *>(node);
+
+ // TODO handle more non-executable Custom ops here
+ if (custom_node->custom_code() == "CircleReferencingConst")
+ return false;
+
+ return true;
+ }
default:
return true;
}
@@ -102,15 +140,34 @@ bool isTensorProducingNode(const luci::CircleNode *node)
case luci::CircleOpcode::CIRCLEOUTPUT:
// The following nodes are multiple-output nodes. They do not produce tensors, the tensors
// are produced by the corresponding *Out nodes instead.
+ case luci::CircleOpcode::BIDIRECTIONAL_SEQUENCE_LSTM:
+ case luci::CircleOpcode::CUSTOM:
case luci::CircleOpcode::IF:
+ case luci::CircleOpcode::NON_MAX_SUPPRESSION_V4:
+ case luci::CircleOpcode::NON_MAX_SUPPRESSION_V5:
case luci::CircleOpcode::SPLIT:
+ case luci::CircleOpcode::SPLIT_V:
+ case luci::CircleOpcode::TOPK_V2:
+ case luci::CircleOpcode::UNIQUE:
case luci::CircleOpcode::UNPACK:
+ case luci::CircleOpcode::WHILE:
return false;
default:
return true;
}
}
+bool isSupportedCustomNode(const luci::CircleNode *node)
+{
+ const auto custom_node = loco::must_cast<const luci::CircleCustom *>(node);
+
+ // TODO handle more Custom ops here
+ if (custom_node->custom_code() == "CircleReferencingConst")
+ return true;
+
+ return false;
+}
+
} // namespace
GraphLoader::GraphLoader(
@@ -129,18 +186,25 @@ void GraphLoader::loadTensors()
{
const auto *node = loco::must_cast<const luci::CircleNode *>(_graph->nodes()->at(i));
+ if (node->opcode() == luci::CircleOpcode::CUSTOM && !isSupportedCustomNode(node))
+ throw std::runtime_error("Unknown Custom Node, yet.");
+
if (!isTensorProducingNode(node))
continue;
- // Only Input and Const nodes have shapes. Shapes of intermediate tensors will be inferred.
+ // Only Input, Const, Custom and Variable nodes have shapes. Shapes of intermediate tensors will
+ // be inferred.
Shape shape{};
- if (const auto *input_node = dynamic_cast<const luci::CircleInput *>(node))
+ switch (node->opcode())
{
- shape = getNodeShape(input_node);
- }
- else if (const auto *const_node = dynamic_cast<const luci::CircleConst *>(node))
- {
- shape = getNodeShape(const_node);
+ case luci::CircleOpcode::CIRCLECONST:
+ case luci::CircleOpcode::CIRCLECUSTOMOUT:
+ case luci::CircleOpcode::CIRCLEINPUT:
+ case luci::CircleOpcode::CIRCLEVARIABLE:
+ shape = getNodeShape(node);
+ break;
+ default:
+ break;
}
AffineQuantization quantization;
@@ -175,6 +239,22 @@ void GraphLoader::loadTensors()
tensor->writeData(const_data, data_size);
}
}
+ else if (const auto *custom_out_node = dynamic_cast<const luci::CircleCustomOut *>(node))
+ {
+ const auto *custom_node =
+ loco::must_cast<const luci::CircleCustom *>(custom_out_node->input());
+
+ if (custom_node->custom_code() == "CircleReferencingConst")
+ {
+ size_t data_size{};
+ const void *const_data = getNodeData(custom_node, &data_size);
+ if (const_data != nullptr)
+ {
+ _memory_manager->allocate_memory(*tensor);
+ tensor->writeData(const_data, data_size);
+ }
+ }
+ }
_node_to_tensor.emplace(node, tensor.get());
_runtime_to_ir.tensor_to_node.emplace(tensor.get(), node);
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp b/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp
index 7a457a62f..b221b6921 100644
--- a/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp
+++ b/compiler/luci-interpreter/src/loader/KernelBuilder.test.cpp
@@ -21,6 +21,7 @@
#include <kernels/Add.h>
#include <kernels/ArgMax.h>
#include <kernels/AveragePool2D.h>
+#include <kernels/BatchMatMul.h>
#include <kernels/Cast.h>
#include <kernels/Concatenation.h>
#include <kernels/Conv2D.h>
@@ -54,6 +55,7 @@
#include <kernels/Mul.h>
#include <kernels/Neg.h>
#include <kernels/NotEqual.h>
+#include <kernels/OneHot.h>
#include <kernels/Pad.h>
#include <kernels/PadV2.h>
#include <kernels/Pow.h>
@@ -209,6 +211,27 @@ TEST_F(KernelBuilderTest, AveragePool2D)
EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
}
+TEST_F(KernelBuilderTest, BatchMatMul)
+{
+ auto *lhs = createInputNode();
+ auto *rhs = createInputNode();
+
+ auto *op = createNode<luci::CircleBatchMatMul>();
+ op->x(lhs);
+ op->y(rhs);
+ op->adj_x(false);
+ op->adj_y(false);
+
+ auto kernel = buildKernel<kernels::BatchMatMul>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->x(), lhs);
+ checkTensor(kernel->y(), rhs);
+ checkTensor(kernel->output(), op);
+ EXPECT_THAT(kernel->params().adj_x, Eq(op->adj_x()));
+ EXPECT_THAT(kernel->params().adj_y, Eq(op->adj_y()));
+}
+
TEST_F(KernelBuilderTest, Cast)
{
auto *input = createInputNode();
@@ -832,6 +855,31 @@ TEST_F(KernelBuilderTest, NotEqual)
checkTensor(kernel->output(), op);
}
+TEST_F(KernelBuilderTest, OneHot)
+{
+ auto *indices = createInputNode();
+ auto *depth = createInputNode();
+ auto *on_value = createInputNode();
+ auto *off_value = createInputNode();
+ auto axis = 1;
+
+ auto *op = createNode<luci::CircleOneHot>();
+ op->indices(indices);
+ op->depth(depth);
+ op->on_value(on_value);
+ op->off_value(off_value);
+ op->axis(axis);
+
+ auto kernel = buildKernel<kernels::OneHot>(op);
+ ASSERT_THAT(kernel, NotNull());
+
+ checkTensor(kernel->indices(), indices);
+ checkTensor(kernel->depth(), depth);
+ checkTensor(kernel->on_value(), on_value);
+ checkTensor(kernel->off_value(), off_value);
+ EXPECT_THAT(kernel->params().axis, Eq(op->axis()));
+}
+
TEST_F(KernelBuilderTest, Pad)
{
auto *input = createInputNode();
diff --git a/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp b/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp
index 5bc37bd4a..efb011257 100644
--- a/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/AveragePool2D.cpp
@@ -17,6 +17,7 @@
#include "Builders.h"
#include "kernels/AveragePool2D.h"
+#include <luci/Plan/CircleNodeExecutionPlan.h>
namespace luci_interpreter
{
@@ -40,7 +41,26 @@ std::unique_ptr<Kernel> build_kernel_CircleAveragePool2D(const luci::CircleNode
params.stride_width = node->stride()->w();
params.activation = node->fusedActivationFunction();
- return std::make_unique<kernels::AveragePool2D>(input, output, params);
+ // It is unknown what data will be stored in scratchpad tensor,
+ // using UINT8 as a most general option
+ auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, "");
+ scratchpad->set_observable(false);
+ scratchpad->set_data_buffer(nullptr);
+ // If node has execution plan then read memory offsets for scratchpad temporary tensor
+ // from the beginning of shared memory buffer.
+ // Used in Static Memory Manager.
+ // TODO move tensors offset initialization to one place
+ if (luci::has_execution_plan(node))
+ {
+ const auto execution_plan = luci::get_execution_plan(node);
+ // Check whether the offset for the current CircleConv2D temporary was found.
+ if (execution_plan.offsets().size() > 1)
+ // If this is true, then we keep this offset in scratchpad.
+ scratchpad->set_offset(execution_plan.offsets().at(1));
+ }
+ Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad));
+
+ return std::make_unique<kernels::AveragePool2D>(input, output, tmp, params);
}
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp b/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp
new file mode 100644
index 000000000..aae3dbab1
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/BatchMatMul.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/BatchMatMul.h"
+#include <luci/Plan/CircleNodeExecutionPlan.h>
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleBatchMatMul(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleBatchMatMul *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 2);
+
+ const Tensor *lhs = helper.getInputTensor(node->x());
+ const Tensor *rhs = helper.getInputTensor(node->y());
+ Tensor *output = helper.getOutputTensor(node);
+
+ auto lhs_scratchpad =
+ std::make_unique<Tensor>(lhs->element_type(), Shape({}), AffineQuantization{}, "");
+ lhs_scratchpad->set_observable(false);
+ lhs_scratchpad->set_data_buffer(nullptr);
+ auto rhs_scratchpad =
+ std::make_unique<Tensor>(rhs->element_type(), Shape({}), AffineQuantization{}, "");
+ rhs_scratchpad->set_observable(false);
+ rhs_scratchpad->set_data_buffer(nullptr);
+ // If node has execution plan then read memory offsets for scratchpad temporary tensor
+ // from the beginning of shared memory buffer.
+ // Used in Static Memory Manager.
+ // TODO move tensors offset initialization to one place
+ if (luci::has_execution_plan(node))
+ {
+ const auto execution_plan = luci::get_execution_plan(node);
+ // Check whether the offset for the current BatchMatMul temporary was found.
+ if (execution_plan.offsets().size() > 1)
+ {
+ assert(execution_plan.offsets().size() == 3);
+
+ // If this is true, then we keep this offset in scratchpad.
+ lhs_scratchpad->set_offset(execution_plan.offsets().at(1));
+ rhs_scratchpad->set_offset(execution_plan.offsets().at(2));
+ }
+ }
+ Tensor *lhs_tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(lhs_scratchpad));
+ Tensor *rhs_tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(rhs_scratchpad));
+
+ BatchMatMulParams params;
+ params.adj_x = node->adj_x();
+ params.adj_y = node->adj_y();
+
+ return std::make_unique<kernels::BatchMatMul>(lhs, rhs, output, lhs_tmp, rhs_tmp, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp b/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp
index 22fd1aca4..b48d97d19 100644
--- a/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/Conv2D.cpp
@@ -35,11 +35,12 @@ std::unique_ptr<Kernel> build_kernel_CircleConv2D(const luci::CircleNode *circle
const Tensor *bias = helper.getOptionalInputTensor(node->bias());
Tensor *output = helper.getOutputTensor(node);
- auto im2col =
- std::make_unique<Tensor>(input->element_type(), Shape({}), AffineQuantization{}, "");
- im2col->set_observable(false);
- im2col->set_data_buffer(nullptr);
- // If node has execution plan then read memory offsets for im2col temporary tensor
+ // It is unknown what data will be stored in scratchpad tensor,
+ // using UINT8 as a most general option
+ auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, "");
+ scratchpad->set_observable(false);
+ scratchpad->set_data_buffer(nullptr);
+ // If node has execution plan then read memory offsets for scratchpad temporary tensor
// from the beginning of shared memory buffer.
// Used in Static Memory Manager.
// TODO move tensors offset initialization to one place
@@ -48,10 +49,10 @@ std::unique_ptr<Kernel> build_kernel_CircleConv2D(const luci::CircleNode *circle
const auto execution_plan = luci::get_execution_plan(node);
// Check whether the offset for the current CircleConv2D temporary was found.
if (execution_plan.offsets().size() > 1)
- // If this is true, then we keep this offset in im2col.
- im2col->set_offset(execution_plan.offsets().at(1));
+ // If this is true, then we keep this offset in scratchpad.
+ scratchpad->set_offset(execution_plan.offsets().at(1));
}
- Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(im2col));
+ Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad));
Conv2DParams params{};
params.padding = node->padding();
diff --git a/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp b/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp
index c2f0346a2..db26ecf2e 100644
--- a/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp
@@ -17,6 +17,7 @@
#include "Builders.h"
#include "kernels/DepthwiseConv2D.h"
+#include <luci/Plan/CircleNodeExecutionPlan.h>
namespace luci_interpreter
{
@@ -43,7 +44,26 @@ std::unique_ptr<Kernel> build_kernel_CircleDepthwiseConv2D(const luci::CircleNod
params.dilation_width_factor = node->dilation()->w();
params.activation = node->fusedActivationFunction();
- return std::make_unique<kernels::DepthwiseConv2D>(input, filter, bias, output, params);
+ // It is unknown what data will be stored in scratchpad tensor,
+ // using UINT8 as a most general option
+ auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, "");
+ scratchpad->set_observable(false);
+ scratchpad->set_data_buffer(nullptr);
+ // If node has execution plan then read memory offsets for scratchpad temporary tensor
+ // from the beginning of shared memory buffer.
+ // Used in Static Memory Manager.
+ // TODO move tensors offset initialization to one place
+ if (luci::has_execution_plan(node))
+ {
+ const auto execution_plan = luci::get_execution_plan(node);
+ // Check whether the offset for the current CircleConv2D temporary was found.
+ if (execution_plan.offsets().size() > 1)
+ // If this is true, then we keep this offset in scratchpad.
+ scratchpad->set_offset(execution_plan.offsets().at(1));
+ }
+ Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad));
+
+ return std::make_unique<kernels::DepthwiseConv2D>(input, filter, bias, output, tmp, params);
}
} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp b/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp
new file mode 100644
index 000000000..4aae56469
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Dequantize.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Dequantize.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDequantize(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleDequantize *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Dequantize>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/ExpandDims.cpp b/compiler/luci-interpreter/src/loader/nodes/ExpandDims.cpp
new file mode 100644
index 000000000..9840c34e5
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/ExpandDims.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ExpandDims.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleExpandDims(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleExpandDims *>(circle_node);
+ assert(node->arity() == 2);
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *axis = helper.getInputTensor(node->axis());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::ExpandDims>(input, axis, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp b/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp
index 2917598fc..0b8ac44bd 100644
--- a/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp
+++ b/compiler/luci-interpreter/src/loader/nodes/FullyConnected.cpp
@@ -36,6 +36,7 @@ std::unique_ptr<Kernel> build_kernel_CircleFullyConnected(const luci::CircleNode
FullyConnectedParams params{};
params.activation = node->fusedActivationFunction();
+ params.keep_num_dims = node->keep_num_dims();
return std::make_unique<kernels::FullyConnected>(input, weights, bias, output, params);
}
diff --git a/compiler/luci-interpreter/src/loader/nodes/Gather.cpp b/compiler/luci-interpreter/src/loader/nodes/Gather.cpp
new file mode 100644
index 000000000..9df9775c5
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Gather.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Gather.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleGather(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleGather *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+ assert(node->arity() == 2);
+
+ const Tensor *params = helper.getInputTensor(node->params());
+ const Tensor *indices = helper.getInputTensor(node->indices());
+ Tensor *output = helper.getOutputTensor(node);
+
+ GatherParams gparams{};
+ gparams.axis = node->axis();
+ // TODO support batch_dims
+ gparams.batch_dims = 0;
+
+ return std::make_unique<kernels::Gather>(params, indices, output, gparams);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/OneHot.cpp b/compiler/luci-interpreter/src/loader/nodes/OneHot.cpp
new file mode 100644
index 000000000..a40160945
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/OneHot.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/OneHot.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleOneHot(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = loco::must_cast<const luci::CircleOneHot *>(circle_node);
+ assert(node->arity() == 4);
+
+ const Tensor *indices = helper.getInputTensor(node->indices());
+ const Tensor *depth = helper.getInputTensor(node->depth());
+ const Tensor *on_value = helper.getInputTensor(node->on_value());
+ const Tensor *off_value = helper.getInputTensor(node->off_value());
+ Tensor *output = helper.getOutputTensor(node);
+
+ OneHotParams params{};
+ params.axis = node->axis();
+
+ return std::make_unique<kernels::OneHot>(indices, depth, on_value, off_value, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp b/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp
new file mode 100644
index 000000000..fd9836345
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/Quantize.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Quantize.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleQuantize(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleQuantize *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ Tensor *output = helper.getOutputTensor(node);
+
+ return std::make_unique<kernels::Quantize>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp b/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp
new file mode 100644
index 000000000..89528d5ee
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/nodes/SVDF.cpp
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SVDF.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSVDF(const luci::CircleNode *circle_node,
+ KernelBuilderHelper &helper)
+{
+ const auto *node = dynamic_cast<const luci::CircleSVDF *>(circle_node);
+ if (node == nullptr)
+ throw std::runtime_error("wrong builder for operation");
+
+ const Tensor *input = helper.getInputTensor(node->input());
+ const Tensor *feature = helper.getInputTensor(node->weight_feature());
+ const Tensor *time = helper.getInputTensor(node->weight_time());
+ const Tensor *bias = helper.getOptionalInputTensor(node->bias());
+ const Tensor *input_activation_state = helper.getInputTensor(node->input_activation_state());
+ Tensor *output = helper.getOutputTensor(node);
+
+ auto scratchpad_tensor = std::make_unique<Tensor>(input_activation_state->element_type(),
+ Shape({}), AffineQuantization{}, "");
+ scratchpad_tensor->set_observable(false);
+ scratchpad_tensor->set_data_buffer(nullptr);
+ Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+ DataType data_type = input->element_type() == DataType::S8 ? DataType::S32 : DataType::FLOAT32;
+
+ scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+ scratchpad_tensor->set_observable(false);
+ scratchpad_tensor->set_data_buffer(nullptr);
+ Tensor *tmp_1 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+ if (data_type == DataType::FLOAT32 &&
+ (feature->element_type() == DataType::S8 || feature->element_type() == DataType::U8))
+ {
+ data_type = feature->element_type();
+ }
+
+ scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+ scratchpad_tensor->set_observable(false);
+ scratchpad_tensor->set_data_buffer(nullptr);
+ Tensor *tmp_2 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+ data_type = DataType::FLOAT32;
+
+ scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+ scratchpad_tensor->set_observable(false);
+ scratchpad_tensor->set_data_buffer(nullptr);
+ Tensor *tmp_3 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+ scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+ scratchpad_tensor->set_observable(false);
+ scratchpad_tensor->set_data_buffer(nullptr);
+ Tensor *tmp_4 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+ scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+ scratchpad_tensor->set_observable(false);
+ scratchpad_tensor->set_data_buffer(nullptr);
+ Tensor *tmp_5 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+ scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+ scratchpad_tensor->set_observable(false);
+ scratchpad_tensor->set_data_buffer(nullptr);
+ Tensor *tmp_6 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+ SVDFParams params{};
+ params.activation = node->fusedActivationFunction();
+ params.svdf_rank = node->svdf_rank();
+ params.asymmetric_quantize_inputs = node->asymmetric_quantize_inputs();
+
+ return std::make_unique<kernels::SVDF>(input, feature, time, bias, input_activation_state, output,
+ tmp, tmp_1, tmp_2, tmp_3, tmp_4, tmp_5, tmp_6, params);
+}
+
+} // namespace luci_interpreter