437 files changed, 40783 insertions, 0 deletions
diff --git a/compiler/luci-micro/luci-interpreter/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/CMakeLists.txt
new file mode 100644
index 000000000..1f7acee87
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/CMakeLists.txt
@@ -0,0 +1,15 @@
+set(LUCI_INTERPRETER_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include")
+set(LUCI_INTERPRETER_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src")
+if (NOT LUCI_INTERPRETER_PAL_DIR)
+    set(LUCI_INTERPRETER_PAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}/pal/linux")
+endif()
+
+set(KERNEL_REGISTER_FILE ${LUCI_INTERPRETER_PAL_DIR}/KernelsToBuild.lst)
+
+if (NOT DEFINED CUSTOM_LUCI_INTERPRETER_SUFFIX)
+    set(LUCI_INTERPRETER_SUFFIX "")
+else()
+    set(LUCI_INTERPRETER_SUFFIX ${CUSTOM_LUCI_INTERPRETER_SUFFIX})
+endif()
+
+add_subdirectory(src)
diff --git a/compiler/luci-micro/luci-interpreter/README.md b/compiler/luci-micro/luci-interpreter/README.md
new file mode 100644
index 000000000..77ec5c81c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/README.md
@@ -0,0 +1,158 @@
+# luci-interpreter
+
+`luci-interpreter` is an inference engine for neural networks represented in luci IR.
+See `compiler/luci/lang` directory for details about IR.
+You can find useful infrastructure, like importer/exporter, optimizations in `compiler/luci`.
+
+`luci-interpreter` provides:
+- Basic inference functionality, input setters and output getters
+- Interface for inspecting hidden interpreter state, like activation values during inference
+- Customization mechanisms to fit the interpreter to specific platforms, like MCUs
+
+Public interface headers are placed in `luci-interpreter/include/luci_interpreter` directory
+
+## Basic usage
+
+Minimal usage includes:
+- Setting input data
+- Running inference
+- Fetching inference results
+
+Interpreter object is reusable and can run multiple inferences.
+Elements in tensors (input/output/internal) are stored contiguously and have C-like layout:
+This means for tensor t=[[0, 1],[2, 3]], t[0,1] == 1.
+
+Input and output tensors have the same indexes as in original luci model. 
+
+**Usage example:**
+``` c++
+// Note getTensorSize is a function that computes tensor size,
+// it is not part of interpreter and should be implemented by user 
+
+luci_interpreter::Interpreter interpreter(luci_module);
+
+// Set inputs
+// assuming model has only one input and one output
+const auto input_nodes = loco::input_nodes(module->graph());
+
+const auto *input_node = dynamic_cast<const luci::CircleInput *>(input_nodes[0]);
+std::vector<char> input_data(getTensorSize(input_node));
+// Initialize input data here
+
+interpreter.writeInputTensor(input_node, input_data.data(), input_data.size());
+
+// Start inference
+interpreter.interpret();
+
+// Fetch inference results
+const auto output_nodes = loco::output_nodes(module->graph());
+const auto *output_node = dynamic_cast<const luci::CircleOutput *>(output_nodes[0]);
+std::vector<char> output_data(getTensorSize(output_node));
+interpreter.readOutputTensor(output_node, output_data.data(), output_data.size());
+```
+
+## Inspecting intermediate state
+
+Interpreter provides interfaces to investigate internal state of interpreter during inference.
+
+This is done by "observer" mechanism:
+- `Interpreter` class has `attachObserver` method, which takes pointer to `ExecutionObserver` object
+- `ExecutionObserver` defines several callback methods user can override to inject custom code
+
+ExecutionObserver provides three callbacks:
+- `postTensorWrite` checks contents of output tensor after operation execution
+- `preOperatorExecute` notifies that interpreter is going to execute operation
+- `postOperatorExecute` notifies that interpreter has finished execution of an operation
+
+See `luci-interpreter/include/luci_interpreter/Interpreter.h` for this interface details.
+
+**Usage example:**
+``` c++
+class CustomExecutionObserver: public luci_interpreter::ExecutionObserver
+{
+public:
+  void postTensorWrite(const luci::CircleNode *node, const Tensor *tensor) override
+  {
+    if (tensor->element_type() != loco::DataType::FLOAT32)
+      return;
+    for (int i = 0; i < tensor->shape().num_elements(); ++i)
+      std::cout << tensor->data<float>[i] << ", ";
+  }
+
+  // User observer can override only needed methods,
+  // others will inherit empty implementation from base observer.
+
+  // void preOperatorExecute(const luci::CircleNode *node);
+  // void postOperatorExecute(const luci::CircleNode *node);
+};
+
+luci_interpreter::Interpreter interpreter(module);
+CustomExecutionObserver observer;
+interpreter.attachObserver(&observer);
+
+// initialize input_data
+interpreter.writeInputTensor(input_node, input_data.data(), input_data.size());
+
+interpreter.interpret();
+```
+
+## Customizing inference
+
+### Memory manager
+
+Interpreter provides a handle for altering default memory management mechanisms.
+
+This is done by `MemoryManger` interface, see `luci-interpreter/include/luci_interpreter/MemoryManager.h` for implementation details.
+
+This header contains `IMemoryManager` abstract class which is responsible for allocation and dealocation of tensors' memory.
+
+User can construct an interpreter with one of predefined memory managers or their own custom memory manager.
+Note that one memory manager could be shared between multiple interpreter instances, because an interpreter does not own the manager object. 
+
+List of predefined memory managers:
+- `SimpleMemoryManager` This is a simple wrapper around new/delete, default one.
+- `TestMemoryManager` Memorizes all allocated memory and releases it in Manager destructor, used in kernel unit tests.
+- `BuddyMemoryManager` Implements Buddy algorithm, uses external buffer for tensor data allocations, does not need new/delete.
+- `StaticMemoryManger` Uses precomputed memory allocation plan. Requires preparation with MemoryPlanner, but could reduce memory consumption in restricted environments (like MCUs).
+
+**SimpleMemoryManager usage example:**
+
+No need to select anything, to use this memory manager.
+``` c++
+luci_interpreter::Interpreter interpreter(module);
+```
+
+**TestMemoryManager usage example:**
+
+``` c++
+luci_interpreter::TestMemoryManager mm;
+luci_interpreter::Interpreter interpreter(module, &mm);
+```
+
+**BuddyMemoryManager usage example:**
+
+`BuddyMemoryManager` implements a classic allocation algorithm: https://en.wikipedia.org/wiki/Buddy_memory_allocation.
+
+This allocator uses an external buffer as a memory pool. That allows to use static memory arrays for allocations.
+
+Limitations
+- Current implementation uses only lower power-of-two bytes of given buffer.
+
+  For example for 1000 bytes buffer, only lower 512 bytes will be used.
+- Current implementation can handle maximum 4 gigabyte memory pool
+
+``` c++
+  constexpr int buffer_size = 2048;
+  static uint8_t buffer[buffer_size];
+  luci_interpreter::BuddyMemoryManager memory_manager(buffer, buffer_size);
+  luci_interpreter::Interpreter interpreter(module.get(), &memory_manager);
+```
+
+**StaticMemoryManager usage example:**
+``` c++
+TBD when it is merged
+```
+
+## Further reading
+
+If you want to participate in development, please read `DEVELOPER.md` for SW architecture details.
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/BuddyMemoryManager.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/BuddyMemoryManager.h
new file mode 100644
index 000000000..205baa626
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/BuddyMemoryManager.h
@@ -0,0 +1,144 @@
+/* Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/MemoryManager.h"
+
+#ifndef LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
+
+namespace luci_interpreter
+{
+
+class BuddyMemoryManager : public IMemoryManager
+{
+public:
+  BuddyMemoryManager(uint8_t *memory_start, int32_t memSize);
+
+  void allocate_memory(luci_interpreter::Tensor &tensor) final;
+  void release_memory(luci_interpreter::Tensor &tensor) final;
+
+private:
+  struct Block
+  {
+    Block *next_free;
+    bool is_free;
+    uint32_t size;
+    // debug field
+    Block *self;
+  };
+
+  Block *_start_block;
+  int32_t _num_blocks;
+  uint32_t _size;
+  Block *_free_blocks[32]{};
+
+  static int32_t lowerLog2(uint32_t val)
+  {
+    int32_t i = 0;
+    while (val >>= 1)
+      i++;
+
+    return i;
+  }
+
+  void addToBlocks(Block *block, int32_t l)
+  {
+    if (!block)
+      return;
+
+    block->next_free = _free_blocks[l];
+    _free_blocks[l] = block;
+  }
+
+  void removeFromBlocks(const Block *block, int32_t l)
+  {
+    if (!block)
+      return;
+
+    Block *tmp = _free_blocks[l];
+
+    if (block == tmp)
+    {
+      _free_blocks[l] = block->next_free;
+      return;
+    }
+
+    while (tmp)
+    {
+      if (tmp->next_free == block)
+      {
+        tmp->next_free = block->next_free;
+        return;
+      }
+
+      tmp = tmp->next_free;
+    }
+  }
+
+  void divideBlock(Block *block, int32_t l)
+  {
+    int32_t size = ((block->size + sizeof(Block)) / 2) - sizeof(Block);
+
+    removeFromBlocks(block, l);
+
+    // there is no need to add to the free_blocks list here
+    block->is_free = true;
+    block->size = size;
+    block->self = block;
+
+    Block *buddy;
+    buddy = (Block *)((uint8_t *)block + sizeof(Block) + size);
+    buddy->is_free = true;
+    buddy->size = size;
+    buddy->self = buddy;
+
+    addToBlocks(buddy, l - 1);
+  }
+
+  Block *mergeBlock(Block *block)
+  {
+    Block *buddy;
+
+    const int32_t l = lowerLog2(block->size + sizeof(Block));
+
+    const int64_t address = ((uint8_t *)block - (uint8_t *)_start_block);
+    buddy = (Block *)((address ^ (1 << l)) + (uint8_t *)_start_block);
+
+    if (!buddy->is_free || buddy->size != block->size)
+      return nullptr;
+
+    if (block > buddy)
+    {
+      Block *x = block;
+      block = buddy;
+      buddy = x;
+    }
+
+    removeFromBlocks(block, l);
+    removeFromBlocks(buddy, l);
+
+    block->size = block->size * 2 + sizeof(Block);
+    block->is_free = true;
+    block->self = block;
+
+    addToBlocks(block, l + 1);
+
+    return block;
+  }
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_BUDDY_MEMORY_MANAGER_H
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h
new file mode 100644
index 000000000..375b1ae20
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/GraphBuilderRegistry.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__
+#define __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__
+
+#include <luci/Import/GraphBuilderRegistry.h>
+
+namespace luci_interpreter
+{
+
+/**
+ * @brief Creates and returns GraphBuilderSource, which allows to not copy constant buffers from
+ * model's file.
+ *
+ * @warning Use this source only in case when model's buffer alive longer than Interpreter.
+ */
+std::unique_ptr<luci::GraphBuilderSource> source_without_constant_copying();
+
+} // namespace luci_interpreter
+
+#endif // __LUCI_INTERPRETER_GRAPH_BUILDER_REGISTRY__
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/Interpreter.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/Interpreter.h
new file mode 100644
index 000000000..8e2f457a5
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/Interpreter.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_INTERPRETER_H
+#define LUCI_INTERPRETER_INTERPRETER_H
+
+#include "luci_interpreter/core/Tensor.h"
+
+#include <luci/IR/Nodes/CircleInput.h>
+#include <luci/IR/Nodes/CircleOutput.h>
+
+#include "luci_interpreter/MemoryManager.h"
+#include <luci/IR/Module.h>
+
+#include <memory>
+#include <vector>
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+
+class ExecutionObserver
+{
+public:
+  virtual ~ExecutionObserver();
+
+  // Called when the value of a tensor has been updated during execution.
+  virtual void postTensorWrite(const luci::CircleNode *node, const Tensor *tensor);
+
+  // Called before / after executing an operator.
+  // Note that these methods are not called for auxiliary operators (CircleInput, CircleOutput,
+  // CircleConst and Circle*Out).
+  virtual void preOperatorExecute(const luci::CircleNode *node);
+  virtual void postOperatorExecute(const luci::CircleNode *node);
+};
+
+class Interpreter
+{
+public:
+  explicit Interpreter(const luci::Module *module);
+
+  explicit Interpreter(const luci::Module *module, IMemoryManager *memory_manager);
+
+  ~Interpreter();
+
+  void writeInputTensor(const luci::CircleInput *input_node, const void *data, size_t data_size);
+
+  void readOutputTensor(const luci::CircleOutput *output_node, void *data, size_t data_size);
+
+  void interpret();
+
+  void attachObserver(ExecutionObserver *observer);
+
+  const Tensor *getTensor(const loco::Node *node) { return _node_to_tensor[node]; }
+
+private:
+  // _default_memory_manager should be before _runtime_module due to
+  // the order of deletion in the destructor
+  std::unique_ptr<IMemoryManager> _default_memory_manager = nullptr;
+  std::unique_ptr<class RuntimeModule> _runtime_module;
+
+  // Observer functionality support.
+  std::unique_ptr<struct RuntimeToIR> _runtime_to_ir;
+  std::unordered_map<const loco::Node *, Tensor *> _node_to_tensor;
+  std::unique_ptr<class EventNotifier> _event_notifier;
+  std::vector<ExecutionObserver *> _observers;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_INTERPRETER_H
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/MemoryManager.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/MemoryManager.h
new file mode 100644
index 000000000..f32c52095
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/MemoryManager.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_MEMORY_MANAGER_H
+
+#include "luci_interpreter/core/DataType.h"
+#include "luci_interpreter/core/Tensor.h"
+
+namespace luci_interpreter
+{
+
+class IMemoryManager
+{
+public:
+  virtual void allocate_memory(luci_interpreter::Tensor &tensor) = 0;
+  virtual void release_memory(luci_interpreter::Tensor &tensor) = 0;
+
+  virtual ~IMemoryManager() = default;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_MEMORY_MANAGER_H
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/SimpleMemoryManager.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/SimpleMemoryManager.h
new file mode 100644
index 000000000..658a1c609
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/SimpleMemoryManager.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
+
+#include "luci_interpreter/MemoryManager.h"
+
+namespace luci_interpreter
+{
+
+class SimpleMemoryManager : public IMemoryManager
+{
+public:
+  void allocate_memory(luci_interpreter::Tensor &tensor) final;
+  void release_memory(luci_interpreter::Tensor &tensor) final;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_SIMPLE_MEMORY_MANAGER_H
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/StaticMemoryManager.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/StaticMemoryManager.h
new file mode 100644
index 000000000..ded7bde79
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/StaticMemoryManager.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
+
+#include "luci_interpreter/MemoryManager.h"
+
+namespace luci_interpreter
+{
+
+// Used for allocations in static buffer, using offsets defined in luci model.
+class StaticMemoryManager : public IMemoryManager
+{
+public:
+  StaticMemoryManager() = delete;
+
+  explicit StaticMemoryManager(uint8_t *buffer_ptr) : _buffer_ptr(buffer_ptr)
+  { /* Do nothing */
+  }
+
+  void allocate_memory(luci_interpreter::Tensor &tensor) final;
+  void release_memory(luci_interpreter::Tensor &tensor) final;
+
+private:
+  // Stores a pointer to the beginning of the allocated memory buffer.
+  uint8_t *_buffer_ptr;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_STATIC_MEMORY_MANAGER_H
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/TestMemoryManager.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/TestMemoryManager.h
new file mode 100644
index 000000000..397bbed76
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/TestMemoryManager.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
+#define LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
+
+#include "luci_interpreter/MemoryManager.h"
+
+namespace luci_interpreter
+{
+// Memory Manager for using in kernels tests. This eliminates the need to manually delete the
+// allocated memory in tests. This mem_manager remembers all its allocations and in destructor
+// delete all allocations.
+class TestMemoryManager : public IMemoryManager
+{
+public:
+  void allocate_memory(luci_interpreter::Tensor &tensor) final;
+  void release_memory(luci_interpreter::Tensor &tensor) final;
+
+  ~TestMemoryManager() override
+  {
+    for (auto allocation : allocations)
+    {
+      delete[] allocation;
+    }
+  }
+
+private:
+  std::vector<uint8_t *> allocations;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_TEST_MEMORY_MANAGER_H
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/DataType.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/DataType.h
new file mode 100644
index 000000000..27bf719b5
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/DataType.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_DATATYPE_H
+#define LUCI_INTERPRETER_CORE_DATATYPE_H
+
+#include <loco/IR/DataType.h>
+#include <loco/IR/DataTypeTraits.h>
+
+#include <cstddef>
+
+namespace luci_interpreter
+{
+
+using DataType = loco::DataType;
+
+template <DataType DT> using DataTypeImpl = loco::DataTypeImpl<DT>;
+
+inline size_t getDataTypeSize(DataType data_type) { return loco::size(data_type); }
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_DATATYPE_H
diff --git a/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/Tensor.h b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/Tensor.h
new file mode 100644
index 000000000..bb9ff6d4a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/include/luci_interpreter/core/Tensor.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_TENSOR_H
+#define LUCI_INTERPRETER_CORE_TENSOR_H
+
+#include "luci_interpreter/core/DataType.h"
+
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace luci_interpreter
+{
+
+class Shape
+{
+public:
+  explicit Shape(int rank) : _dims(rank, 0) {}
+
+  Shape(std::initializer_list<int32_t> dims) : _dims(dims.begin(), dims.end()) {}
+
+  int num_dims() const { return _dims.size(); }
+
+  int32_t dim(int i) const
+  {
+    assert(i >= 0 && i < static_cast<int>(_dims.size()));
+    return _dims[i];
+  }
+
+  int32_t &dim(int i)
+  {
+    assert(i >= 0 && i < static_cast<int>(_dims.size()));
+    return _dims[i];
+  }
+
+  int32_t num_elements() const
+  {
+    int32_t result = 1;
+    for (const int32_t dim : _dims)
+    {
+      result *= dim;
+    }
+    return result;
+  }
+
+  bool operator==(const Shape &other) const { return _dims == other._dims; }
+
+  bool operator!=(const Shape &other) const { return !operator==(other); }
+
+private:
+  std::vector<int32_t> _dims;
+};
+
+// Tensor affine quantization parameters.
+//
+// The relationship between real and quantized values:
+//   real_value = (quantized_value - zero_point) * scale
+//
+// In per-tensor case, 'scale' and 'zero_point' are one element each.
+// In per-channel case, 'scale' and 'zero_point' are N elements each, where N is the size
+// of the quantized dimension.
+//
+// Note that due to historical and performance reasons, per-tensor quantization uses unsigned
+// integer types, while per-channel uses signed types assuming 'zero_point' == 0.
+struct AffineQuantization
+{
+  std::vector<float> scale;
+  std::vector<int32_t> zero_point;
+  int32_t quantized_dimension;
+};
+
+class Tensor
+{
+public:
+  Tensor(DataType element_type, Shape shape, AffineQuantization quantization, std::string name);
+
+  DataType element_type() const { return _element_type; }
+
+  const Shape &shape() const { return _shape; }
+
+  float scale() const
+  {
+    assert(_quantization.scale.size() == 1);
+    return _quantization.scale[0];
+  }
+
+  int32_t zero_point() const
+  {
+    assert(_quantization.zero_point.size() == 1);
+    return _quantization.zero_point[0];
+  }
+
+  const std::vector<float> &scales() const { return _quantization.scale; }
+
+  const std::vector<int32_t> &zero_points() const { return _quantization.zero_point; }
+
+  int32_t quantized_dimension() const { return _quantization.quantized_dimension; }
+
+  template <typename T> const T *data() const
+  {
+    static_assert(std::is_same<uint8_t, char>::value or
+                  std::is_same<uint8_t, unsigned char>::value);
+    return reinterpret_cast<const T *>(_data);
+  }
+
+  template <typename T> T *data()
+  {
+    static_assert(std::is_same<uint8_t, char>::value or
+                  std::is_same<uint8_t, unsigned char>::value);
+    return reinterpret_cast<T *>(_data);
+  }
+
+  const std::string &name() const { return _name; }
+
+  void readData(void *data_ptr, size_t data_size) const;
+
+  void writeData(const void *data_ptr, size_t data_size);
+
+  void resize(const Shape &new_shape);
+
+  void set_data_buffer(uint8_t *buffer)
+  {
+    if (buffer == nullptr)
+    {
+      _data_allocated = false;
+    }
+    else
+    {
+      _data_allocated = true;
+    }
+    _data = buffer;
+  }
+
+  bool is_observable() const { return _is_observable; }
+
+  void set_observable(bool value) { _is_observable = value; }
+
+  bool is_allocatable() const { return _is_allocatable; }
+
+  void set_allocatable(bool value) { _is_allocatable = value; }
+
+  bool is_data_allocated() const { return _data_allocated; }
+
+  int32_t get_offset() const { return _offset; }
+
+  void set_offset(int32_t offset) { _offset = offset; }
+
+private:
+  DataType _element_type;
+  Shape _shape;
+  AffineQuantization _quantization;
+  uint8_t *_data;
+  std::string _name;
+  bool _data_allocated;
+  // Write of tensor is reported to registered Observers only if this tensor is observable
+  // This is needed for tensors used in kernel implementation, but not present in original model.
+  bool _is_observable = true;
+  // Memory manager is called for tensor only if it is "allocatable".
+  // Kernel configuration could disable allocation of some tensors if they are not needed for
+  // particular operation.
+  bool _is_allocatable = true;
+  // Used by static memory manager.
+  // Stores the offset from the beginning of the allocated memory buffer.
+  int32_t _offset = -1;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_TENSOR_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
new file mode 100644
index 000000000..f0df58db3
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/KernelsToBuild.lst
@@ -0,0 +1,62 @@
+REGISTER_KERNEL(Add)
+REGISTER_KERNEL(ArgMax)
+REGISTER_KERNEL(AveragePool2D)
+REGISTER_KERNEL(BatchToSpaceND)
+REGISTER_KERNEL(Cast)
+REGISTER_KERNEL(Concatenation)
+REGISTER_KERNEL(Conv2D)
+REGISTER_KERNEL(DepthToSpace)
+REGISTER_KERNEL(DepthwiseConv2D)
+REGISTER_KERNEL(Dequantize)
+REGISTER_KERNEL(Div)
+REGISTER_KERNEL(Elu)
+REGISTER_KERNEL(Exp)
+REGISTER_KERNEL(ExpandDims)
+REGISTER_KERNEL(Fill)
+REGISTER_KERNEL(Floor)
+REGISTER_KERNEL(FloorDiv)
+REGISTER_KERNEL(Equal)
+REGISTER_KERNEL(FullyConnected)
+REGISTER_KERNEL(Greater)
+REGISTER_KERNEL(GreaterEqual)
+REGISTER_KERNEL(If)
+REGISTER_KERNEL(InstanceNorm)
+REGISTER_KERNEL(L2Normalize)
+REGISTER_KERNEL(L2Pool2D)
+REGISTER_KERNEL(LeakyRelu)
+REGISTER_KERNEL(Less)
+REGISTER_KERNEL(LessEqual)
+REGISTER_KERNEL(LogicalAnd)
+REGISTER_KERNEL(LogicalNot)
+REGISTER_KERNEL(LogicalOr)
+REGISTER_KERNEL(Logistic)
+REGISTER_KERNEL(Maximum)
+REGISTER_KERNEL(MaxPool2D)
+REGISTER_KERNEL(Minimum)
+REGISTER_KERNEL(MirrorPad)
+REGISTER_KERNEL(Mul)
+REGISTER_KERNEL(Neg)
+REGISTER_KERNEL(NotEqual)
+REGISTER_KERNEL(Pad)
+REGISTER_KERNEL(PadV2)
+REGISTER_KERNEL(PRelu)
+REGISTER_KERNEL(Quantize)
+REGISTER_KERNEL(Reshape)
+REGISTER_KERNEL(ResizeBilinear)
+REGISTER_KERNEL(ResizeNearestNeighbor)
+REGISTER_KERNEL(Rsqrt)
+REGISTER_KERNEL(Shape)
+REGISTER_KERNEL(Softmax)
+REGISTER_KERNEL(SpaceToBatchND)
+REGISTER_KERNEL(SpaceToDepth)
+REGISTER_KERNEL(StridedSlice)
+REGISTER_KERNEL(Sqrt)
+REGISTER_KERNEL(Square)
+REGISTER_KERNEL(SquaredDifference)
+REGISTER_KERNEL(Squeeze)
+REGISTER_KERNEL(Sub)
+REGISTER_KERNEL(SVDF)
+REGISTER_KERNEL(Tanh)
+REGISTER_KERNEL(Transpose)
+REGISTER_KERNEL(TransposeConv)
+REGISTER_KERNEL(While)
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALArgMax.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALArgMax.h
new file mode 100644
index 000000000..21e63296d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALArgMax.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ARGMAX_H
+#define LUCI_INTERPRETER_PAL_ARGMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/arg_min_max.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T1, typename T2, typename T3>
+static inline void ArgMinMax(const tflite::RuntimeShape &input1_shape, const T1 *input1_data,
+                             const T2 *axis, const tflite::RuntimeShape &output_shape,
+                             T3 *output_data, const std::greater<T1> cmp)
+{
+  tflite::reference_ops::ArgMinMax(input1_shape, input1_data, axis, output_shape, output_data, cmp);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ARGMAX_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h
new file mode 100644
index 000000000..a274afb7e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALAveragePool2d.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+#define LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+#include <arm_nn_types.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void AveragePool(const tflite::PoolParams &params,
+                               const tflite::RuntimeShape &input_shape, const T *input_data,
+                               const tflite::RuntimeShape &output_shape, T *output_data,
+                               const tflite::RuntimeShape &scratchpad_shape, T *scratchpad_data)
+{
+  {
+    // MARK: At this moment this operation is not supported
+    assert(false && "AveragePool NYI");
+    (void)params;
+    (void)input_shape;
+    (void)input_data;
+    (void)output_shape;
+    (void)output_data;
+    (void)scratchpad_shape;
+    (void)scratchpad_data;
+  }
+}
+
+template <>
+inline void AveragePool<int8_t>(const tflite::PoolParams &params,
+                                const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+                                const tflite::RuntimeShape &output_shape, int8_t *output_data,
+                                const tflite::RuntimeShape &scratchpad_shape,
+                                int8_t *scratchpad_data)
+{
+  assert(input_shape.DimensionsCount() == 4);
+  assert(output_shape.DimensionsCount() == 4);
+  assert(scratchpad_data != nullptr);
+
+  const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+  assert(batches == 1);
+
+  const int depth = tflite::MatchingDim(input_shape, 3, output_shape, 3);
+
+  cmsis_nn_dims input_dims;
+  input_dims.n = 1;
+  input_dims.h = input_shape.Dims(1);
+  input_dims.w = input_shape.Dims(2);
+  input_dims.c = depth;
+
+  cmsis_nn_dims output_dims;
+  output_dims.n = 1;
+  output_dims.h = output_shape.Dims(1);
+  output_dims.w = output_shape.Dims(2);
+  output_dims.c = depth;
+
+  cmsis_nn_pool_params pool_params;
+  pool_params.stride.h = params.stride_height;
+  pool_params.stride.w = params.stride_width;
+  pool_params.padding.h = params.padding_values.height;
+  pool_params.padding.w = params.padding_values.width;
+  pool_params.activation.min = params.quantized_activation_min;
+  pool_params.activation.max = params.quantized_activation_max;
+
+  cmsis_nn_dims filter_dims;
+  filter_dims.n = 1;
+  filter_dims.h = params.filter_height;
+  filter_dims.w = params.filter_width;
+  filter_dims.c = 1;
+
+  cmsis_nn_context ctx;
+  ctx.buf = scratchpad_data;
+  ctx.size = scratchpad_shape.Dims(0);
+  auto res = arm_avgpool_s8(&ctx, &pool_params, &input_dims, input_data, &filter_dims, &output_dims,
+                            output_data);
+  assert(res == ARM_MATH_SUCCESS);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &output_shape)
+
+{
+  if (input_data_type == luci_interpreter::DataType::S8)
+  {
+    assert(input_shape.DimensionsCount() == 4);
+    assert(output_shape.DimensionsCount() == 4);
+
+    const int32_t output_width = output_shape.Dims(2);
+    const int32_t depth = tflite::MatchingDim(input_shape, 3, output_shape, 3);
+
+    const int32_t buf_size = arm_avgpool_s8_get_buffer_size(output_width, depth);
+    auto data_type_size = static_cast<int32_t>(luci_interpreter::getDataTypeSize(input_data_type));
+
+    luci_interpreter::Shape scratchpad_shape{buf_size * data_type_size};
+    scratchpad->resize(scratchpad_shape);
+  }
+  else
+  {
+    scratchpad->set_allocatable(false);
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h
new file mode 100644
index 000000000..4dd77ffdc
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALBatchToSpaceND.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+#define LUCI_INTERPRETER_PAL_ARGMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+               const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+               const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::BatchToSpaceND(
+    unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+    unextended_input3_shape, crops_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALConv2d.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALConv2d.h
new file mode 100644
index 000000000..cfb84ea60
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALConv2d.h
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_CONV2D_H
+#define LUCI_INTERPRETER_PAL_CONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/conv.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h>
+#include <arm_nn_types.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+                        const float *input_data, const tflite::RuntimeShape &filter_shape,
+                        const float *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const float *bias_data, const tflite::RuntimeShape &output_shape,
+                        float *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        float *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                              bias_shape, bias_data, output_shape, output_data,
+                              tflite::RuntimeShape(), nullptr);
+}
+
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+                        const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
+                        const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+                        uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        uint8 *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                              bias_shape, bias_data, output_shape, output_data, scratchpad_shape,
+                              scratchpad_data, nullptr);
+}
+
+static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult,
+                                  const int32_t *shifts, const tflite::RuntimeShape &input_shape,
+                                  const int8 *input_data, const tflite::RuntimeShape &filter_shape,
+                                  const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
+                                  const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+                                  int8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                                  int8 *scratchpad_data)
+{
+  if (scratchpad_data)
+  {
+    cmsis_nn_conv_params conv_params;
+    conv_params.dilation.h = params.dilation_height_factor;
+    conv_params.dilation.w = params.dilation_width_factor;
+
+    assert(conv_params.dilation.h == 1);
+    assert(conv_params.dilation.w == 1);
+
+    conv_params.input_offset = params.input_offset;
+    conv_params.output_offset = params.output_offset;
+    conv_params.stride.h = params.stride_height;
+    conv_params.stride.w = params.stride_width;
+    conv_params.padding.h = params.padding_values.height;
+    conv_params.padding.w = params.padding_values.width;
+    conv_params.activation.min = params.quantized_activation_min;
+    conv_params.activation.max = params.quantized_activation_max;
+
+    cmsis_nn_per_channel_quant_params quant_params;
+    quant_params.multiplier = const_cast<int32_t *>(mult);
+    quant_params.shift = const_cast<int32_t *>(shifts);
+
+    assert(conv_params.activation.min <= conv_params.activation.max);
+    assert(input_shape.DimensionsCount() == 4);
+    assert(filter_shape.DimensionsCount() == 4);
+    assert(output_shape.DimensionsCount() == 4);
+    const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+    const int input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+    const int output_depth = tflite::MatchingDim(filter_shape, 0, output_shape, 3);
+    if (bias_data)
+    {
+      assert(bias_shape.FlatSize() == output_depth);
+    }
+
+    cmsis_nn_dims input_dims;
+    input_dims.n = batch_size;
+    input_dims.h = input_shape.Dims(1);
+    input_dims.w = input_shape.Dims(2);
+    input_dims.c = input_depth;
+
+    cmsis_nn_dims filter_dims;
+    filter_dims.n = output_depth;
+    filter_dims.h = filter_shape.Dims(1);
+    filter_dims.w = filter_shape.Dims(2);
+    filter_dims.c = input_depth;
+
+    cmsis_nn_dims bias_dims;
+    bias_dims.n = 1;
+    bias_dims.h = 1;
+    bias_dims.w = 1;
+    bias_dims.c = output_depth;
+
+    cmsis_nn_dims output_dims;
+    output_dims.n = batch_size;
+    output_dims.h = output_shape.Dims(1);
+    output_dims.w = output_shape.Dims(2);
+    output_dims.c = output_depth;
+
+    cmsis_nn_context ctx;
+    ctx.buf = scratchpad_data;
+    ctx.size = scratchpad_shape.Dims(0);
+
+    auto res = arm_convolve_wrapper_s8(&ctx, &conv_params, &quant_params, &input_dims, input_data,
+                                       &filter_dims, filter_data, &bias_dims, bias_data,
+                                       &output_dims, output_data);
+    assert(res == ARM_MATH_SUCCESS);
+  }
+  else
+  {
+    tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
+                                                  filter_shape, filter_data, bias_shape, bias_data,
+                                                  output_shape, output_data);
+  }
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::ConvParams &params,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &filter_shape,
+                                         const tflite::RuntimeShape &output_shape)
+{
+  cmsis_nn_conv_params conv_params;
+  conv_params.dilation.h = params.dilation_height_factor;
+  conv_params.dilation.w = params.dilation_width_factor;
+
+  if (input_data_type == loco::DataType::S8 && conv_params.dilation.h == 1 &&
+      conv_params.dilation.w == 1)
+  {
+    const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+    const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+    const int32_t output_depth = tflite::MatchingDim(filter_shape, 0, output_shape, 3);
+    const int32_t filter_height = filter_shape.Dims(1);
+    const int32_t filter_width = filter_shape.Dims(2);
+    const int32_t output_height = output_shape.Dims(1);
+    const int32_t output_width = output_shape.Dims(2);
+
+    conv_params.input_offset = params.input_offset;
+    conv_params.output_offset = params.output_offset;
+    conv_params.stride.h = params.stride_height;
+    conv_params.stride.w = params.stride_width;
+    conv_params.padding.h = params.padding_values.height;
+    conv_params.padding.w = params.padding_values.width;
+
+    cmsis_nn_dims input_dims;
+    input_dims.n = batches;
+    input_dims.h = input_shape.Dims(1);
+    input_dims.w = input_shape.Dims(2);
+    input_dims.c = input_depth;
+
+    cmsis_nn_dims filter_dims;
+    filter_dims.n = output_depth;
+    filter_dims.h = filter_height;
+    filter_dims.w = filter_width;
+    filter_dims.c = input_depth;
+
+    cmsis_nn_dims output_dims;
+    output_dims.n = batches;
+    output_dims.h = output_height;
+    output_dims.w = output_width;
+    output_dims.c = output_depth;
+
+    const int32_t buf_size = arm_convolve_wrapper_s8_get_buffer_size(&conv_params, &input_dims,
+                                                                     &filter_dims, &output_dims);
+
+    luci_interpreter::Shape scratchpad_shape{buf_size};
+    scratchpad->resize(scratchpad_shape);
+  }
+  else
+  {
+    scratchpad->set_allocatable(false);
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_CONV2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h
new file mode 100644
index 000000000..8463e571e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthToSpace.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+#define LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+
+#include <tensorflow/lite/kernels/internal/reference/depth_to_space.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void DepthToSpace(const tflite::DepthToSpaceParams &op_params,
+                                const tflite::RuntimeShape &unextended_input_shape,
+                                const T *input_data,
+                                const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::DepthToSpace(op_params, unextended_input_shape, input_data,
+                                      unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h
new file mode 100644
index 000000000..120dcd803
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+DepthwiseConvPerChannel(const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+                        const int32_t *output_shift, const tflite::RuntimeShape &input_shape,
+                        const T *input_data, const tflite::RuntimeShape &filter_shape,
+                        const T *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const int32_t *bias_data, const tflite::RuntimeShape &output_shape,
+                        T *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        T *scratchpad_data)
+{
+  {
+    // MARK: At this moment this operation is not supported
+    assert(false && "DepthwiseConvPerChannel NYI");
+    (void)params;
+    (void)output_multiplier;
+    (void)output_shift;
+    (void)input_shape;
+    (void)output_data;
+    (void)input_data;
+    (void)filter_shape;
+    (void)filter_data;
+    (void)bias_shape;
+    (void)bias_data;
+    (void)output_shape;
+    (void)output_data;
+    (void)scratchpad_shape;
+    (void)scratchpad_data;
+  }
+}
+
+template <>
+inline void DepthwiseConvPerChannel<int8_t>(
+  const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+  const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+  const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+  const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+  const tflite::RuntimeShape &output_shape, int8_t *output_data,
+  const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data)
+{
+  if (scratchpad_data)
+  {
+    cmsis_nn_dw_conv_params dw_conv_params;
+    dw_conv_params.dilation.h = params.dilation_height_factor;
+    dw_conv_params.dilation.w = params.dilation_width_factor;
+    assert(dw_conv_params.dilation.h == 1);
+    assert(dw_conv_params.dilation.w == 1);
+
+    dw_conv_params.input_offset = params.input_offset;
+    dw_conv_params.output_offset = params.output_offset;
+    dw_conv_params.stride.h = params.stride_height;
+    dw_conv_params.stride.w = params.stride_width;
+    dw_conv_params.padding.h = params.padding_values.height;
+    dw_conv_params.padding.w = params.padding_values.width;
+
+    dw_conv_params.activation.min = params.quantized_activation_min;
+    dw_conv_params.activation.max = params.quantized_activation_max;
+    dw_conv_params.ch_mult = params.depth_multiplier;
+
+    cmsis_nn_per_channel_quant_params quant_params;
+    int32_t output_multiplier = params.output_multiplier;
+    int32_t output_shift = params.output_shift;
+
+    quant_params.multiplier = &output_multiplier;
+    quant_params.shift = &output_shift;
+
+    assert(dw_conv_params.activation.min <= dw_conv_params.activation.max);
+    const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+    const int output_depth = tflite::MatchingDim(filter_shape, 3, output_shape, 3);
+    if (bias_data)
+    {
+      assert(bias_shape.FlatSize() == output_depth);
+    }
+
+    cmsis_nn_dims input_dims;
+    input_dims.n = batch_size;
+    input_dims.h = input_shape.Dims(1);
+    input_dims.w = input_shape.Dims(2);
+    input_dims.c = input_shape.Dims(3);
+
+    cmsis_nn_dims filter_dims;
+    filter_dims.n = filter_shape.Dims(0);
+    filter_dims.h = filter_shape.Dims(1);
+    filter_dims.w = filter_shape.Dims(2);
+    filter_dims.c = output_depth;
+
+    cmsis_nn_dims bias_dims;
+    bias_dims.n = 1;
+    bias_dims.h = 1;
+    bias_dims.w = 1;
+    bias_dims.c = output_depth;
+
+    cmsis_nn_dims output_dims;
+    output_dims.n = batch_size;
+    output_dims.h = output_shape.Dims(1);
+    output_dims.w = output_shape.Dims(2);
+    output_dims.c = output_depth;
+
+    cmsis_nn_context ctx;
+    ctx.buf = scratchpad_data;
+    ctx.size = scratchpad_shape.Dims(0);
+
+    auto res = arm_depthwise_conv_wrapper_s8(&ctx, &dw_conv_params, &quant_params, &input_dims,
+                                             input_data, &filter_dims, filter_data, &bias_dims,
+                                             bias_data, &output_dims, output_data);
+    assert(res == ARM_MATH_SUCCESS);
+  }
+  else
+  {
+    tflite::reference_integer_ops::DepthwiseConvPerChannel(
+      params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
+      bias_shape, bias_data, output_shape, output_data);
+  }
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const tflite::DepthwiseParams &params,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &filter_shape,
+                                         const tflite::RuntimeShape &output_shape)
+{
+  cmsis_nn_dw_conv_params dw_conv_params;
+  dw_conv_params.dilation.h = params.dilation_height_factor;
+  dw_conv_params.dilation.w = params.dilation_width_factor;
+
+  if (input_data_type == loco::DataType::S8 && dw_conv_params.dilation.h == 1 &&
+      dw_conv_params.dilation.w == 1)
+  {
+    const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+    const int output_depth = tflite::MatchingDim(filter_shape, 3, output_shape, 3);
+
+    cmsis_nn_dims input_dims;
+    input_dims.n = batch_size;
+    input_dims.h = input_shape.Dims(1);
+    input_dims.w = input_shape.Dims(2);
+    input_dims.c = input_shape.Dims(3);
+
+    cmsis_nn_dims filter_dims;
+    filter_dims.n = filter_shape.Dims(0);
+    filter_dims.h = filter_shape.Dims(1);
+    filter_dims.w = filter_shape.Dims(2);
+    filter_dims.c = output_depth;
+
+    cmsis_nn_dims output_dims;
+    output_dims.n = batch_size;
+    output_dims.h = output_shape.Dims(1);
+    output_dims.w = output_shape.Dims(2);
+    output_dims.c = output_depth;
+
+    const int32_t buf_size = arm_depthwise_conv_wrapper_s8_get_buffer_size(
+      &dw_conv_params, &input_dims, &filter_dims, &output_dims);
+
+    auto data_type_size = static_cast<int32_t>(luci_interpreter::getDataTypeSize(input_data_type));
+
+    luci_interpreter::Shape scratchpad_shape{buf_size * data_type_size};
+    scratchpad->resize(scratchpad_shape);
+  }
+  else
+  {
+    scratchpad->set_allocatable(false);
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDequantize.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDequantize.h
new file mode 100644
index 000000000..15ff0327b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALDequantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h"
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+
+namespace luci_interpreter_pal
+{
+
+template <typename T>
+static inline void Dequantize(tflite::DequantizationParams &params,
+                              const tflite::RuntimeShape &input_shape, const T *input_data,
+                              const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_integer_ops::Dequantize<T>(params, input_shape, input_data, output_shape,
+                                               output_data);
+}
+
+static inline void Dequantize(tflite::DequantizationParams &params,
+                              const tflite::RuntimeShape &input_shape, const uint8_t *input_data,
+                              const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::Dequantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALElu.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALElu.h
new file mode 100644
index 000000000..4089d0a0c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALElu.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ELU_H
+#define LUCI_INTERPRETER_PAL_ELU_H
+
+#include <tensorflow/lite/kernels/internal/reference/elu.h>
+
+namespace luci_interpreter_pal
+{
+
+static inline void Elu(const tflite::RuntimeShape &input_shape, const float *input_data,
+                       const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::Elu(input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ELU_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALFullyConnected.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALFullyConnected.h
new file mode 100644
index 000000000..32e905761
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALFullyConnected.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+#define LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+
+#include <tensorflow/lite/kernels/internal/reference/fully_connected.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void FullyConnected(const tflite::FullyConnectedParams &params,
+                                  const tflite::RuntimeShape &input_shape, const T *input_data,
+                                  const tflite::RuntimeShape &filter_shape, const T *filter_data,
+                                  const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+                                  const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  {
+    // MARK: At this moment this operation doesn't support
+    assert(false && "FullyConnected NYI");
+    (void)params;
+    (void)input_shape;
+    (void)input_data;
+    (void)filter_shape;
+    (void)filter_data;
+    (void)bias_shape;
+    (void)bias_data;
+    (void)output_shape;
+    (void)output_data;
+  }
+}
+
+template <>
+inline void
+FullyConnected<int8_t>(const tflite::FullyConnectedParams &params,
+                       const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+                       const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+                       const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+                       const tflite::RuntimeShape &output_shape, int8_t *output_data)
+{
+  assert(output_shape.DimensionsCount() == 2);
+
+  const int batches = output_shape.Dims(0);
+  const int output_depth = output_shape.Dims(1);
+
+  const int filter_dim_count = filter_shape.DimensionsCount();
+  const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
+
+  cmsis_nn_fc_params fc_params;
+  fc_params.input_offset = params.input_offset;
+  fc_params.output_offset = params.output_offset;
+  fc_params.filter_offset = params.weights_offset;
+  fc_params.activation.min = params.quantized_activation_min;
+  fc_params.activation.max = params.quantized_activation_max;
+
+  cmsis_nn_per_tensor_quant_params quant_params;
+  quant_params.multiplier = params.output_multiplier;
+  quant_params.shift = params.output_shift;
+
+  cmsis_nn_dims input_dims;
+  input_dims.n = batches;
+  input_dims.h = 1;
+  input_dims.w = 1;
+  input_dims.c = accum_depth;
+
+  cmsis_nn_dims filter_dims;
+  filter_dims.n = accum_depth;
+  filter_dims.h = 1;
+  filter_dims.w = 1;
+  filter_dims.c = output_depth;
+
+  cmsis_nn_dims bias_dims;
+  bias_dims.n = 1;
+  bias_dims.h = 1;
+  bias_dims.w = 1;
+  bias_dims.c = output_depth;
+
+  cmsis_nn_dims output_dims;
+  output_dims.n = batches;
+  output_dims.h = 1;
+  output_dims.w = 1;
+  output_dims.c = output_depth;
+
+  int32_t buf_size = arm_fully_connected_s8_get_buffer_size(&filter_dims);
+  auto buffer = std::make_unique<int8_t[]>(buf_size);
+  assert(buffer != nullptr);
+
+  cmsis_nn_context ctx;
+  ctx.buf = buffer.get();
+  ctx.size = buf_size;
+
+  auto res =
+    arm_fully_connected_s8(&ctx, &fc_params, &quant_params, &input_dims, input_data, &filter_dims,
+                           filter_data, &bias_dims, bias_data, &output_dims, output_data);
+  assert(res == ARM_MATH_SUCCESS);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Normalize.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Normalize.h
new file mode 100644
index 000000000..f84742a44
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Normalize.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+#define LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+
+#include <tensorflow/lite/kernels/internal/reference/l2normalization.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Normalization(const tflite::L2NormalizationParams &op_params,
+                                   const tflite::RuntimeShape &input_shape, const T *input_data,
+                                   const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::L2Normalization(op_params, input_shape, input_data, output_shape,
+                                         output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2NORMALIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h
new file mode 100644
index 000000000..38a302fc6
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALL2Pool2D.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2POOL2D_H
+#define LUCI_INTERPRETER_PAL_L2POOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Pool(const tflite::PoolParams &params, const tflite::RuntimeShape &input_shape,
+                          const T *input_data, const tflite::RuntimeShape &output_shape,
+                          T *output_data)
+{
+  tflite::reference_ops::L2Pool(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2POOL2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h
new file mode 100644
index 000000000..9ccd2224f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALLeakyRelu.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LEAKYRELU_H
+#define LUCI_INTERPRETER_PAL_LEAKYRELU_H
+
+#include <tensorflow/lite/kernels/internal/reference/leaky_relu.h>
+
+namespace luci_interpreter_pal
+{
+static inline void LeakyRelu(const tflite::LeakyReluParams &params,
+                             const tflite::RuntimeShape &input_shape, const float *input_data,
+                             const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::LeakyRelu(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LEAKYRELU_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALMul.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALMul.h
new file mode 100644
index 000000000..347a97a83
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALMul.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_MUL_H
+#define LUCI_INTERPRETER_PAL_MUL_H
+
+#include <tensorflow/lite/kernels/internal/reference/mul.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+                       const T *input1_data, const tflite::RuntimeShape &input2_shape,
+                       const T *input2_data, const tflite::RuntimeShape &output_shape,
+                       T *output_data)
+{
+  tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+                                            input2_data, output_shape, output_data);
+}
+
+template <typename T>
+static inline void
+BroadcastMul4DSlow(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+                   const T *input1_data, const tflite::RuntimeShape &input2_shape,
+                   const T *input2_data, const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+                                            input2_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_MUL_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALNeg.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALNeg.h
new file mode 100644
index 000000000..be5903a0c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALNeg.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_NEG_H
+#define LUCI_INTERPRETER_PAL_NEG_H
+
+#include <tensorflow/lite/kernels/internal/reference/neg.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Negate(const tflite::RuntimeShape &input_shape, const T *input_data,
+                          const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::Negate(input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_NEG_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALQuantize.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALQuantize.h
new file mode 100644
index 000000000..6046789ae
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALQuantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
+#define LUCI_INTERPRETER_PAL_QUANTIZE_H
+
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Quantize(tflite::QuantizationParams &params,
+                            const tflite::RuntimeShape &input_shape, const float *input_data,
+                            const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+template <typename Input, typename Output>
+static inline void Requantize(const Input *input_data, int32_t size,
+                              int32_t effective_scale_multiplier, int32_t effective_scale_shift,
+                              int32_t input_zero_point, int32_t output_zero_point,
+                              Output *output_data)
+{
+  tflite::reference_ops::Requantize(input_data, size, effective_scale_multiplier,
+                                    effective_scale_shift, input_zero_point, output_zero_point,
+                                    output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h
new file mode 100644
index 000000000..cc9f0fd54
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeBilinear.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+#define LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+
+#include <tensorflow/lite/kernels/internal/reference/resize_bilinear.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeBilinear(const tflite::ResizeBilinearParams &op_params,
+               const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+               const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::ResizeBilinear(op_params, unextended_input_shape, input_data,
+                                        output_size_shape, output_size_data,
+                                        unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h
new file mode 100644
index 000000000..f4d5a6ed3
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALResizeNearestNeighbor.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+#define LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+
+#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams &op_params,
+                      const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+                      const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+                      const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::ResizeNearestNeighbor(op_params, unextended_input_shape, input_data,
+                                               output_size_shape, output_size_data,
+                                               unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSVDF.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSVDF.h
new file mode 100644
index 000000000..a4a5b2a78
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSVDF.h
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SVDF_H
+#define LUCI_INTERPRETER_PAL_SVDF_H
+
+#include <arm_nn_types.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+IntegerSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+            const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
+            const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+            const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
+            const int32_t *bias_data, int16_t *activation_state_data,
+            const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
+            int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
+            int scale_2_b, int32_t input_zp, int32_t output_zp)
+{
+  const int32_t rank = params.rank;
+  const int32_t batch_size = input_shape.Dims(0);
+  const int32_t num_filters = weight_feature_shape.Dims(0);
+  const int32_t memory_size = weight_time_shape.Dims(1);
+
+  cmsis_nn_dims input_dims;
+  input_dims.n = input_shape.Dims(0);
+  input_dims.h = input_shape.Dims(1);
+
+  cmsis_nn_dims weights_feature_dims;
+  weights_feature_dims.n = weight_feature_shape.Dims(0);
+  weights_feature_dims.h = weight_feature_shape.Dims(1);
+
+  cmsis_nn_dims weights_time_dims;
+  weights_time_dims.n = weight_time_shape.Dims(0);
+  weights_time_dims.h = weight_time_shape.Dims(1);
+
+  cmsis_nn_dims bias_dims;
+  bias_dims.n = bias_shape.Dims(0);
+
+  cmsis_nn_dims state_dims;
+  state_dims.n = batch_size;
+  state_dims.h = memory_size * num_filters;
+
+  cmsis_nn_dims output_dims;
+  output_dims.n = output_shape.Dims(0);
+  output_dims.h = output_shape.Dims(1);
+
+  cmsis_nn_svdf_params svdf_params;
+  svdf_params.rank = params.rank;
+  svdf_params.input_offset = input_zp;
+  svdf_params.output_offset = output_zp;
+
+  svdf_params.input_activation.min = INT16_MIN;
+  svdf_params.input_activation.max = INT16_MAX;
+
+  svdf_params.output_activation.min = INT8_MIN;
+  svdf_params.output_activation.max = INT8_MAX;
+
+  cmsis_nn_per_tensor_quant_params in_quant_params;
+  in_quant_params.multiplier = scale_1_a;
+  in_quant_params.shift = scale_1_b;
+
+  cmsis_nn_per_tensor_quant_params out_quant_params;
+  out_quant_params.multiplier = scale_2_a;
+  out_quant_params.shift = scale_2_b;
+
+  cmsis_nn_context scratch_ctx;
+  scratch_ctx.buf = scratchpad_data;
+
+  cmsis_nn_context scratch_output_ctx;
+  scratch_output_ctx.buf = output_temp_data;
+
+  arm_svdf_s8(&scratch_ctx, &scratch_output_ctx, &svdf_params, &in_quant_params, &out_quant_params,
+              &input_dims, input_data, &state_dims, activation_state_data, &weights_feature_dims,
+              weight_feature_data, &weights_time_dims, weight_time_data, &bias_dims, bias_data,
+              &output_dims, output_data);
+}
+static inline void
+FloatSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+          const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
+          const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+          const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
+          const float *bias_data, float *scratchpad_data, float *activation_state_data,
+          const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  const int32_t rank = params.rank;
+  const int32_t batch_size = input_shape.Dims(0);
+  const int32_t input_size = input_shape.Dims(1);
+  const int32_t num_filters = weight_feature_shape.Dims(0);
+  const int32_t num_units = num_filters / rank;
+  const int32_t memory_size = weight_time_shape.Dims(1);
+
+  // Left shift the activation_state.
+  {
+    float *new_state_start = activation_state_data;
+    const float *old_state_start = activation_state_data + 1;
+    const float *old_state_end = activation_state_data + batch_size * num_filters * memory_size;
+    while (old_state_start != old_state_end)
+    {
+      *new_state_start++ = *old_state_start++;
+    }
+  }
+
+  // Note: no need to clear the latest activation, matmul is not accumulative.
+
+  // Compute conv1d(inputs, weights_feature).
+  // The activation_state's rightmost column is used to save current cycle
+  // activation. This is achieved by starting at state_ptr[memory_size - 1] and
+  // having the stride equal to memory_size.
+
+  // Perform batched matrix vector multiply operation:
+  {
+    const float *matrix = weight_feature_data;
+    const float *vector = input_data;
+    float *result = &activation_state_data[memory_size - 1];
+    float *result_in_batch = result;
+    for (int i = 0; i < batch_size; ++i)
+    {
+      const float *matrix_ptr = matrix;
+      for (int j = 0; j < num_filters; ++j)
+      {
+        float dot_prod = 0.0f;
+        const float *vector_in_batch = vector + i * input_size;
+        for (int k = 0; k < input_size; ++k)
+        {
+          dot_prod += *matrix_ptr++ * *vector_in_batch++;
+        }
+        *result_in_batch = dot_prod;
+        result_in_batch += memory_size;
+      }
+    }
+  }
+
+  tflite::reference_ops::ApplyTimeWeightsBiasAndActivation(
+    batch_size, memory_size, num_filters, num_units, rank, weight_time_data, bias_data,
+    params.activation, activation_state_data, scratchpad_data, output_data);
+}
+
+static inline void SetupScratchpadTensor(
+  const luci_interpreter::DataType &input_data_type,
+  const luci_interpreter::DataType &weight_feature_data_type,
+  luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
+  luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
+  luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
+  const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
+  const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
+{
+  if (input_data_type == loco::DataType::FLOAT32 &&
+      (weight_feature_data_type == loco::DataType::S8 ||
+       weight_feature_data_type == loco::DataType::U8))
+  {
+    (void)input_shape;
+    (void)weight_time_shape;
+    (void)scratchpad_3;
+    (void)scratchpad_4;
+    (void)scratchpad_5;
+    (void)scratchpad_6;
+
+    throw std::runtime_error("Hybrid type is not supported for cmsisnn");
+  }
+
+  // Resize scratchpad_1 tensor
+  scratchpad_1->resize({batch_size, num_filters});
+
+  if (input_data_type == loco::DataType::S8)
+  {
+    // Resize scratchpad_2 for full_integer op
+    scratchpad_2->resize({batch_size, num_units});
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SVDF_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSoftmax.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSoftmax.h
new file mode 100644
index 000000000..6bbda4867
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSoftmax.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SOFTMAX_H
+#define LUCI_INTERPRETER_PAL_SOFTMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/softmax.h>
+#include <arm_nnfunctions.h>
+
+namespace luci_interpreter_pal
+{
+static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale,
+                                              float beta)
+{
+  // Do nothing for mcu
+  (void)data;
+  (void)input_scale;
+  (void)beta;
+}
+
+static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta)
+{
+  int32 input_beta_multiplier;
+  int input_beta_left_shift;
+  static const int kScaledDiffIntegerBits = 5;
+  tflite::PreprocessSoftmaxScaling(beta, input_scale, kScaledDiffIntegerBits,
+                                   &input_beta_multiplier, &input_beta_left_shift);
+
+  params->input_multiplier = input_beta_multiplier;
+  params->input_left_shift = input_beta_left_shift;
+  params->diff_min =
+    -tflite::CalculateInputRadius(kScaledDiffIntegerBits, params->input_left_shift);
+}
+
+template <typename T>
+static inline void Softmax(const tflite::SoftmaxParams &params,
+                           const tflite::RuntimeShape &input_shape, const T *input_data,
+                           const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  // MARK: At this moment this operation doesn't support on mcu
+  assert(false && "Softmax NYI");
+  (void)params;
+  (void)input_shape;
+  (void)input_data;
+  (void)output_shape;
+  (void)output_data;
+}
+
+template <>
+inline void Softmax<int8_t>(const tflite::SoftmaxParams &params,
+                            const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+                            const tflite::RuntimeShape &output_shape, int8_t *output_data)
+{
+  const int trailing_dim = input_shape.DimensionsCount() - 1;
+  const int outer_size = tflite::MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+  const int depth = tflite::MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+  const int32_t mult = params.input_multiplier;
+  const int32_t shift = params.input_left_shift;
+  const int32_t diff_min = params.diff_min;
+
+  arm_softmax_s8(input_data, outer_size, depth, mult, shift, diff_min, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SOFTMAX_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h
new file mode 100644
index 000000000..fdddaa929
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToBatchND.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+#define LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+
+#include <tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+SpaceToBatchND(const tflite::SpaceToBatchParams &params,
+               const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+               const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+               const tflite::RuntimeShape &unextended_input3_shape, const int32 *paddings_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::SpaceToBatchND(
+    params, unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+    unextended_input3_shape, paddings_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h
new file mode 100644
index 000000000..816b7f663
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSpaceToDepth.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+#define LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+
+#include <tensorflow/lite/kernels/internal/reference/space_to_depth.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void SpaceToDepth(const tflite::SpaceToDepthParams &op_params,
+                                const tflite::RuntimeShape &unextended_input_shape,
+                                const T *input_data,
+                                const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::SpaceToDepth(op_params, unextended_input_shape, input_data,
+                                      unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETODEPTH_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSub.h b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSub.h
new file mode 100644
index 000000000..ea57578c6
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/PALSub.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SUB_H
+#define LUCI_INTERPRETER_PAL_SUB_H
+
+#include <tensorflow/lite/kernels/internal/reference/sub.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Sub(const tflite::ArithmeticParams &params,
+                       const tflite::RuntimeShape &input1_shape, const T *input1_data,
+                       const tflite::RuntimeShape &input2_shape, const T *input2_data,
+                       const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::Sub(params, input1_shape, input1_data, input2_shape, input2_data,
+                             output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SUB_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/cmsisnn/pal.cmake b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/pal.cmake
new file mode 100644
index 000000000..a68b363d9
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/cmsisnn/pal.cmake
@@ -0,0 +1,65 @@
+macro(initialize_pal)
+    nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET)
+    nnas_find_package(CMSISSource EXACT 5.8.0 QUIET)
+
+    if (NOT TensorFlowSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: TensorFlow not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowGEMMLowpSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: gemmlowp not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowEigenSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: Eigen not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowRuySource_FOUND)
+        message(STATUS "Skipping luci-interpreter: Ruy not found")
+        return()
+    endif ()
+
+    if (NOT CMSISSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: CMSISSource not found")
+        return()
+    endif ()
+
+    set(PAL_INITIALIZED TRUE)
+endmacro()
+
+macro(add_pal_to_target TGT)
+    target_include_directories(${TGT} PRIVATE "${PAL}")
+    target_include_directories(${TGT} PRIVATE
+            "${TensorFlowRuySource_DIR}"
+            "${TensorFlowGEMMLowpSource_DIR}"
+            "${TensorFlowEigenSource_DIR}"
+            "${TensorFlowSource_DIR}")
+    target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR})
+
+    file(GLOB_RECURSE PAL_SOURCES "${CMSISSource_DIR}/CMSIS/NN/Source/*.c")
+    list(APPEND PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc
+            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
+            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc)
+    add_library(luci_interpreter_cmsisnn_pal STATIC ${PAL_SOURCES})
+    set_property(TARGET luci_interpreter_cmsisnn_pal PROPERTY POSITION_INDEPENDENT_CODE ON)
+    target_include_directories(luci_interpreter_cmsisnn_pal PRIVATE
+            "${TensorFlowRuySource_DIR}"
+            "${TensorFlowGEMMLowpSource_DIR}"
+            "${TensorFlowEigenSource_DIR}"
+            "${TensorFlowSource_DIR}"
+    )
+
+    add_subdirectory(${CMSISSource_DIR}/CMSIS/NN ${CMAKE_CURRENT_BINARY_DIR}/CMSISNN)
+    target_include_directories(luci_interpreter_cmsisnn_pal PUBLIC
+            "${CMSISSource_DIR}/CMSIS/NN/Include"
+            "${CMSISSource_DIR}/CMSIS/DSP/Include"
+            "${CMSISSource_DIR}/CMSIS/Core/Include")
+
+    target_link_libraries(${TGT} PRIVATE luci_interpreter_cmsisnn_pal)
+endmacro()
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/KernelsToBuild.lst b/compiler/luci-micro/luci-interpreter/pal/linux/KernelsToBuild.lst
new file mode 100644
index 000000000..8e20559f9
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/KernelsToBuild.lst
@@ -0,0 +1,77 @@
+REGISTER_KERNEL(Add)
+REGISTER_KERNEL(ArgMax)
+REGISTER_KERNEL(AveragePool2D)
+REGISTER_KERNEL(BatchMatMul)
+REGISTER_KERNEL(BatchToSpaceND)
+REGISTER_KERNEL(Cast)
+REGISTER_KERNEL(Concatenation)
+REGISTER_KERNEL(Conv2D)
+REGISTER_KERNEL(DepthToSpace)
+REGISTER_KERNEL(DepthwiseConv2D)
+REGISTER_KERNEL(Dequantize)
+REGISTER_KERNEL(Div)
+REGISTER_KERNEL(Elu)
+REGISTER_KERNEL(Exp)
+REGISTER_KERNEL(ExpandDims)
+REGISTER_KERNEL(Fill)
+REGISTER_KERNEL(Floor)
+REGISTER_KERNEL(FloorDiv)
+REGISTER_KERNEL(Equal)
+REGISTER_KERNEL(FullyConnected)
+REGISTER_KERNEL(Gather)
+REGISTER_KERNEL(Greater)
+REGISTER_KERNEL(GreaterEqual)
+REGISTER_KERNEL(If)
+REGISTER_KERNEL(InstanceNorm)
+REGISTER_KERNEL(L2Normalize)
+REGISTER_KERNEL(L2Pool2D)
+REGISTER_KERNEL(LeakyRelu)
+REGISTER_KERNEL(Less)
+REGISTER_KERNEL(LessEqual)
+REGISTER_KERNEL(LocalResponseNormalization)
+REGISTER_KERNEL(LogicalAnd)
+REGISTER_KERNEL(LogicalNot)
+REGISTER_KERNEL(LogicalOr)
+REGISTER_KERNEL(Logistic)
+REGISTER_KERNEL(LogSoftmax)
+REGISTER_KERNEL(Maximum)
+REGISTER_KERNEL(MaxPool2D)
+REGISTER_KERNEL(Mean)
+REGISTER_KERNEL(Minimum)
+REGISTER_KERNEL(MirrorPad)
+REGISTER_KERNEL(Mul)
+REGISTER_KERNEL(Neg)
+REGISTER_KERNEL(NotEqual)
+REGISTER_KERNEL(OneHot)
+REGISTER_KERNEL(Pack)
+REGISTER_KERNEL(Pad)
+REGISTER_KERNEL(PadV2)
+REGISTER_KERNEL(Pow)
+REGISTER_KERNEL(PRelu)
+REGISTER_KERNEL(Quantize)
+REGISTER_KERNEL(Relu)
+REGISTER_KERNEL(Relu6)
+REGISTER_KERNEL(Reshape)
+REGISTER_KERNEL(ResizeBilinear)
+REGISTER_KERNEL(ResizeNearestNeighbor)
+REGISTER_KERNEL(ReverseV2)
+REGISTER_KERNEL(Rsqrt)
+REGISTER_KERNEL(Shape)
+REGISTER_KERNEL(Slice)
+REGISTER_KERNEL(Softmax)
+REGISTER_KERNEL(SpaceToBatchND)
+REGISTER_KERNEL(SpaceToDepth)
+REGISTER_KERNEL(Split)
+REGISTER_KERNEL(SplitV)
+REGISTER_KERNEL(StridedSlice)
+REGISTER_KERNEL(Sqrt)
+REGISTER_KERNEL(Square)
+REGISTER_KERNEL(SquaredDifference)
+REGISTER_KERNEL(Squeeze)
+REGISTER_KERNEL(Sub)
+REGISTER_KERNEL(SVDF)
+REGISTER_KERNEL(Tanh)
+REGISTER_KERNEL(Transpose)
+REGISTER_KERNEL(TransposeConv)
+REGISTER_KERNEL(Unpack)
+REGISTER_KERNEL(While)
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALArgMax.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALArgMax.h
new file mode 100644
index 000000000..21e63296d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALArgMax.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ARGMAX_H
+#define LUCI_INTERPRETER_PAL_ARGMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/arg_min_max.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T1, typename T2, typename T3>
+static inline void ArgMinMax(const tflite::RuntimeShape &input1_shape, const T1 *input1_data,
+                             const T2 *axis, const tflite::RuntimeShape &output_shape,
+                             T3 *output_data, const std::greater<T1> cmp)
+{
+  tflite::reference_ops::ArgMinMax(input1_shape, input1_data, axis, output_shape, output_data, cmp);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ARGMAX_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALAveragePool2d.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALAveragePool2d.h
new file mode 100644
index 000000000..cce30601f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALAveragePool2d.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+#define LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void AveragePool(const tflite::PoolParams &params,
+                               const tflite::RuntimeShape &input_shape, const T *input_data,
+                               const tflite::RuntimeShape &output_shape, T *output_data,
+                               const tflite::RuntimeShape &scratchpad_shape, T *scratchpad_data)
+{
+  {
+    // MARK: At this moment this operation doesn't support
+    assert(false && "AveragePool NYI");
+    (void)params;
+    (void)input_shape;
+    (void)input_data;
+    (void)output_shape;
+    (void)output_data;
+    (void)scratchpad_shape;
+    (void)scratchpad_data;
+  }
+}
+
+template <>
+inline void AveragePool<int8_t>(const tflite::PoolParams &params,
+                                const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+                                const tflite::RuntimeShape &output_shape, int8_t *output_data,
+                                const tflite::RuntimeShape &scratchpad_shape,
+                                int8_t *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+
+  tflite::reference_integer_ops::AveragePool(params, input_shape, input_data, output_shape,
+                                             output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &output_shape)
+
+{
+  (void)input_data_type;
+  (void)input_shape;
+  (void)output_shape;
+
+  scratchpad->set_allocatable(false);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALBatchMatMul.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALBatchMatMul.h
new file mode 100644
index 000000000..3894f2d92
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALBatchMatMul.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_BATCHMATMUL_H
+#define LUCI_INTERPRETER_PAL_BATCHMATMUL_H
+
+#include <tensorflow/lite/kernels/internal/reference/batch_matmul.h>
+
+namespace luci_interpreter_pal
+{
+inline void BatchMatMul(const tflite::RuntimeShape &lhs_shape, const float *lhs_data,
+                        const tflite::RuntimeShape &rhs_shape, const float *rhs_data,
+                        const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::BatchMatMul(lhs_shape, lhs_data, rhs_shape, rhs_data, output_shape,
+                                     output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *lhs_scratchpad,
+                                         luci_interpreter::Tensor *rhs_scratchpad,
+                                         const tflite::RuntimeShape &lhs_shape,
+                                         const tflite::RuntimeShape &rhs_shape)
+{
+  // Scratchpad for transposed LHS
+  {
+    auto lhs_rank = lhs_shape.DimensionsCount();
+    luci_interpreter::Shape scratchpad_size(lhs_rank);
+    for (int i = 0; i < lhs_rank - 2; ++i)
+    {
+      scratchpad_size.dim(i) = lhs_shape.Dims(i);
+    }
+    scratchpad_size.dim(lhs_rank - 2) = lhs_shape.Dims(lhs_rank - 1);
+    scratchpad_size.dim(lhs_rank - 1) = lhs_shape.Dims(lhs_rank - 2);
+
+    lhs_scratchpad->resize(scratchpad_size);
+  }
+  // Scratchpad for transposed RHS
+  {
+    auto rhs_rank = rhs_shape.DimensionsCount();
+    luci_interpreter::Shape scratchpad_size(rhs_rank);
+    for (int i = 0; i < rhs_rank - 2; ++i)
+    {
+      scratchpad_size.dim(i) = rhs_shape.Dims(i);
+    }
+    scratchpad_size.dim(rhs_rank - 2) = rhs_shape.Dims(rhs_rank - 1);
+    scratchpad_size.dim(rhs_rank - 1) = rhs_shape.Dims(rhs_rank - 2);
+
+    rhs_scratchpad->resize(scratchpad_size);
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_BATCHMATMUL_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALBatchToSpaceND.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALBatchToSpaceND.h
new file mode 100644
index 000000000..3fe2022ed
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALBatchToSpaceND.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+#define LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+               const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+               const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::optimized_ops::BatchToSpaceND(
+    unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+    unextended_input3_shape, crops_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALConv2d.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALConv2d.h
new file mode 100644
index 000000000..985a15f39
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALConv2d.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_CONV2D_H
+#define LUCI_INTERPRETER_PAL_CONV2D_H
+
+#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+                        const float *input_data, const tflite::RuntimeShape &filter_shape,
+                        const float *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const float *bias_data, const tflite::RuntimeShape &output_shape,
+                        float *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        float *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  if (scratchpad_data)
+  {
+    const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+    const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+    const int32_t output_height = output_shape.Dims(1);
+    const int32_t output_width = output_shape.Dims(2);
+    const int32_t filter_height = filter_shape.Dims(1);
+    const int32_t filter_width = filter_shape.Dims(2);
+    tflite::RuntimeShape im2col_shape{batches, output_height, output_width,
+                                      input_depth * filter_height * filter_width};
+
+    tflite::optimized_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                                bias_shape, bias_data, output_shape, output_data, im2col_shape,
+                                scratchpad_data);
+  }
+  else
+    tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                                bias_shape, bias_data, output_shape, output_data,
+                                tflite::RuntimeShape(), nullptr);
+}
+
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+                        const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
+                        const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+                        uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        uint8 *scratchpad_data)
+{
+  // TODO This should only be done once (although it takes only a few microseconds).
+  //  Also, the user should be able to adjust the number of threads.
+  auto gemmlowp_context = std::make_unique<gemmlowp::GemmContext>();
+  gemmlowp_context->set_max_num_threads(static_cast<int>(std::thread::hardware_concurrency()));
+
+  tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                              bias_shape, bias_data, output_shape, output_data, scratchpad_shape,
+                              scratchpad_data, gemmlowp_context.get());
+}
+
+static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult,
+                                  const int32_t *shifts, const tflite::RuntimeShape &input_shape,
+                                  const int8 *input_data, const tflite::RuntimeShape &filter_shape,
+                                  const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
+                                  const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+                                  int8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                                  int8 *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  // TODO enable optimized version
+  tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
+                                                filter_shape, filter_data, bias_shape, bias_data,
+                                                output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::ConvParams &params,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &filter_shape,
+                                         const tflite::RuntimeShape &output_shape)
+{
+  const int32_t filter_height = filter_shape.Dims(1);
+  const int32_t filter_width = filter_shape.Dims(2);
+
+  // Allocate tensor for scratchpad, if needed.
+  // The checks here should be aligned with the actual implementation.
+  const bool need_dilated_scratchpad =
+    params.dilation_height_factor != 1 || params.dilation_width_factor != 1;
+  const bool need_non_dilated_scratchpad = params.stride_height != 1 || params.stride_width != 1 ||
+                                           filter_height != 1 || filter_width != 1;
+  auto _need_scratchpad = input_data_type != luci_interpreter::DataType::S16 &&
+                          (need_dilated_scratchpad || need_non_dilated_scratchpad);
+
+  if (_need_scratchpad)
+  {
+    const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+    const int32_t input_depth = tflite::MatchingDim(input_shape, 3, filter_shape, 3);
+    const int32_t output_height = output_shape.Dims(1);
+    const int32_t output_width = output_shape.Dims(2);
+
+    auto data_type_size = static_cast<int32_t>(luci_interpreter::getDataTypeSize(input_data_type));
+    int32_t scratchpad_size = batches * output_width * output_height * input_depth * filter_height *
+                              filter_width * data_type_size;
+    luci_interpreter::Shape scratchpad_shape{scratchpad_size};
+    scratchpad->resize(scratchpad_shape);
+  }
+  else
+  {
+    scratchpad->set_allocatable(false);
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_CONV2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALDepthToSpace.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALDepthToSpace.h
new file mode 100644
index 000000000..f9ebfcfb5
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALDepthToSpace.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+#define LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void DepthToSpace(const tflite::DepthToSpaceParams &op_params,
+                                const tflite::RuntimeShape &unextended_input_shape,
+                                const T *input_data,
+                                const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::optimized_ops::DepthToSpace(op_params, unextended_input_shape, input_data,
+                                      unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALDepthwiseConv2d.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALDepthwiseConv2d.h
new file mode 100644
index 000000000..c9d1a2948
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALDepthwiseConv2d.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+DepthwiseConvPerChannel(const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+                        const int32_t *output_shift, const tflite::RuntimeShape &input_shape,
+                        const T *input_data, const tflite::RuntimeShape &filter_shape,
+                        const T *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const int32_t *bias_data, const tflite::RuntimeShape &output_shape,
+                        T *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        T *scratchpad_data)
+{
+  {
+    // MARK: At this moment this operation is not supported
+    assert(false && "DepthwiseConvPerChannel NYI");
+    (void)params;
+    (void)output_multiplier;
+    (void)output_shift;
+    (void)input_shape;
+    (void)output_data;
+    (void)input_data;
+    (void)filter_shape;
+    (void)filter_data;
+    (void)bias_shape;
+    (void)bias_data;
+    (void)output_shape;
+    (void)output_data;
+    (void)scratchpad_shape;
+    (void)scratchpad_data;
+  }
+}
+
+template <>
+inline void DepthwiseConvPerChannel<int8_t>(
+  const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+  const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+  const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+  const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+  const tflite::RuntimeShape &output_shape, int8_t *output_data,
+  const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  tflite::reference_integer_ops::DepthwiseConvPerChannel(
+    params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
+    bias_shape, bias_data, output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const tflite::DepthwiseParams &params,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &filter_shape,
+                                         const tflite::RuntimeShape &output_shape)
+
+{
+  (void)params;
+  (void)input_data_type;
+  (void)input_shape;
+  (void)filter_shape;
+  (void)output_shape;
+
+  scratchpad->set_allocatable(false);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALDequantize.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALDequantize.h
new file mode 100644
index 000000000..3af6d0777
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALDequantize.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Dequantize(tflite::DequantizationParams &params,
+                              const tflite::RuntimeShape &input_shape, const T *input_data,
+                              const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::optimized_ops::Dequantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALElu.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALElu.h
new file mode 100644
index 000000000..cb365ffd0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALElu.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ELU_H
+#define LUCI_INTERPRETER_PAL_ELU_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Elu(const tflite::RuntimeShape &input_shape, const float *input_data,
+                       const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::optimized_ops::Elu(input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ELU_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALFullyConnected.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALFullyConnected.h
new file mode 100644
index 000000000..62970dbf7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALFullyConnected.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+#define LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+
+#include <tensorflow/lite/kernels/internal/reference/fully_connected.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void FullyConnected(const tflite::FullyConnectedParams &params,
+                                  const tflite::RuntimeShape &input_shape, const T *input_data,
+                                  const tflite::RuntimeShape &filter_shape, const T *filter_data,
+                                  const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+                                  const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  {
+    // MARK: At this moment this operation doesn't support
+    assert(false && "FullyConnected NYI");
+    (void)params;
+    (void)input_shape;
+    (void)input_data;
+    (void)filter_shape;
+    (void)filter_data;
+    (void)bias_shape;
+    (void)bias_data;
+    (void)output_shape;
+    (void)output_data;
+  }
+}
+
+template <>
+inline void
+FullyConnected<int8_t>(const tflite::FullyConnectedParams &params,
+                       const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+                       const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+                       const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+                       const tflite::RuntimeShape &output_shape, int8_t *output_data)
+{
+  tflite::reference_integer_ops::FullyConnected(params, input_shape, input_data, filter_shape,
+                                                filter_data, bias_shape, bias_data, output_shape,
+                                                output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALGather.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALGather.h
new file mode 100644
index 000000000..49ac35f93
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALGather.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_GATHER_H
+#define LUCI_INTERPRETER_PAL_GATHER_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T, typename CoordsT = int32>
+static inline void Gather(const tflite::GatherParams &op_params,
+                          const tflite::RuntimeShape &input_shape, const T *input_data,
+                          const tflite::RuntimeShape &coords_shape, const CoordsT *coords_data,
+                          const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::optimized_ops::Gather(op_params, input_shape, input_data, coords_shape, coords_data,
+                                output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_GATHER_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALL2Normalize.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALL2Normalize.h
new file mode 100644
index 000000000..6c663e21f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALL2Normalize.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+#define LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Normalization(const tflite::L2NormalizationParams &op_params,
+                                   const tflite::RuntimeShape &input_shape, const T *input_data,
+                                   const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::optimized_ops::L2Normalization(op_params, input_shape, input_data, output_shape,
+                                         output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2NORMALIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALL2Pool2D.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALL2Pool2D.h
new file mode 100644
index 000000000..aac57f2b2
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALL2Pool2D.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2POOL2D_H
+#define LUCI_INTERPRETER_PAL_L2POOL2D_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Pool(const tflite::PoolParams &params, const tflite::RuntimeShape &input_shape,
+                          const T *input_data, const tflite::RuntimeShape &output_shape,
+                          T *output_data)
+{
+  tflite::optimized_ops::L2Pool(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2POOL2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALLeakyRelu.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALLeakyRelu.h
new file mode 100644
index 000000000..e8209bae6
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALLeakyRelu.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LEAKYRELU_H
+#define LUCI_INTERPRETER_PAL_LEAKYRELU_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void LeakyRelu(const tflite::LeakyReluParams &params,
+                             const tflite::RuntimeShape &input_shape, const float *input_data,
+                             const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::optimized_ops::LeakyRelu(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LEAKYRELU_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALLocalResponseNormalization.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALLocalResponseNormalization.h
new file mode 100644
index 000000000..54f7f0916
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALLocalResponseNormalization.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LOCALRESPONSENORMALIZATION_H
+#define LUCI_INTERPRETER_PAL_LOCALRESPONSENORMALIZATION_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+LocalResponseNormalization(const tflite::LocalResponseNormalizationParams &op_params,
+                           const tflite::RuntimeShape &input_shape, const float *input_data,
+                           const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::optimized_ops::LocalResponseNormalization(op_params, input_shape, input_data,
+                                                    output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LOCALRESPONSENORMALIZATION_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALLogSoftmax.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALLogSoftmax.h
new file mode 100644
index 000000000..a32e3eec6
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALLogSoftmax.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LOGSOFTMAX_H
+#define LUCI_INTERPRETER_PAL_LOGSOFTMAX_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale,
+                                              float beta)
+{
+  tflite::optimized_ops::PopulateSoftmaxLookupTable(data, input_scale, beta);
+}
+
+static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta)
+{
+  // Do nothing for linux
+  (void)params;
+  (void)input_scale;
+  (void)beta;
+}
+
+static inline void LogSoftmax(const tflite::SoftmaxParams &params, float input_scale,
+                              const tflite::RuntimeShape &input_shape, const uint8 *input_data,
+                              const tflite::RuntimeShape &output_shape, uint8 *output_data)
+{
+  tflite::optimized_ops::LogSoftmax(params, input_scale, input_shape, input_data, output_shape,
+                                    output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LOGSOFTMAX_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALMul.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALMul.h
new file mode 100644
index 000000000..a8a9d4abc
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALMul.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_MUL_H
+#define LUCI_INTERPRETER_PAL_MUL_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+                       const T *input1_data, const tflite::RuntimeShape &input2_shape,
+                       const T *input2_data, const tflite::RuntimeShape &output_shape,
+                       T *output_data)
+{
+  tflite::optimized_ops::Mul(params, input1_shape, input1_data, input2_shape, input2_data,
+                             output_shape, output_data);
+}
+
+template <>
+inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+                const int64_t *input1_data, const tflite::RuntimeShape &input2_shape,
+                const int64_t *input2_data, const tflite::RuntimeShape &output_shape,
+                int64_t *output_data)
+{
+  tflite::optimized_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+                                            input2_data, output_shape, output_data);
+}
+
+template <typename T>
+static inline void
+BroadcastMul4DSlow(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+                   const T *input1_data, const tflite::RuntimeShape &input2_shape,
+                   const T *input2_data, const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::optimized_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+                                            input2_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_MUL_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALNeg.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALNeg.h
new file mode 100644
index 000000000..797ffee1b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALNeg.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_NEG_H
+#define LUCI_INTERPRETER_PAL_NEG_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Negate(const tflite::RuntimeShape &input_shape, const T *input_data,
+                          const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::Negate(input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_NEG_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALQuantize.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALQuantize.h
new file mode 100644
index 000000000..bf1d7954e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALQuantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
+#define LUCI_INTERPRETER_PAL_QUANTIZE_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Quantize(tflite::QuantizationParams &params,
+                            const tflite::RuntimeShape &input_shape, const float *input_data,
+                            const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::optimized_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+template <typename Input, typename Output>
+static inline void Requantize(const Input *input_data, int32_t size,
+                              int32_t effective_scale_multiplier, int32_t effective_scale_shift,
+                              int32_t input_zero_point, int32_t output_zero_point,
+                              Output *output_data)
+{
+  tflite::optimized_ops::Requantize(input_data, size, effective_scale_multiplier,
+                                    effective_scale_shift, input_zero_point, output_zero_point,
+                                    output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALRelu.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALRelu.h
new file mode 100644
index 000000000..b4c715d3e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALRelu.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RELU_H
+#define LUCI_INTERPRETER_PAL_RELU_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Relu(const tflite::RuntimeShape &input_shape, const float *input_data,
+                        const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::optimized_ops::Relu(input_shape, input_data, output_shape, output_data);
+}
+
+template <typename T>
+static inline void ReluX(const tflite::ReluParams &params, const tflite::RuntimeShape &input_shape,
+                         const T *input_data, const tflite::RuntimeShape &output_shape,
+                         T *output_data)
+{
+  tflite::optimized_ops::ReluX(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RELU_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALRelu6.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALRelu6.h
new file mode 100644
index 000000000..bf2f91aa5
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALRelu6.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RELU6_H
+#define LUCI_INTERPRETER_PAL_RELU6_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Relu6(const tflite::RuntimeShape &input_shape, const float *input_data,
+                         const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::optimized_ops::Relu6(input_shape, input_data, output_shape, output_data);
+}
+
+template <typename T>
+static inline void ReluX(const tflite::ReluParams &params, const tflite::RuntimeShape &input_shape,
+                         const T *input_data, const tflite::RuntimeShape &output_shape,
+                         T *output_data)
+{
+  tflite::optimized_ops::ReluX(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RELU6_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALResizeBilinear.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALResizeBilinear.h
new file mode 100644
index 000000000..7380081dc
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALResizeBilinear.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+#define LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+
+#include <tensorflow/lite/kernels/internal/optimized/resize_bilinear.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeBilinear(const tflite::ResizeBilinearParams &op_params,
+               const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+               const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::optimized_ops::ResizeBilinear(op_params, unextended_input_shape, input_data,
+                                        output_size_shape, output_size_data,
+                                        unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALResizeNearestNeighbor.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALResizeNearestNeighbor.h
new file mode 100644
index 000000000..74d19265b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALResizeNearestNeighbor.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+#define LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams &op_params,
+                      const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+                      const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+                      const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::optimized_ops::ResizeNearestNeighbor(op_params, unextended_input_shape, input_data,
+                                               output_size_shape, output_size_data,
+                                               unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSVDF.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSVDF.h
new file mode 100644
index 000000000..0ffba14f0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSVDF.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SVDF_H
+#define LUCI_INTERPRETER_PAL_SVDF_H
+
+#include <tensorflow/lite/kernels/internal/reference/svdf.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+IntegerSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+            const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
+            const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+            const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
+            const int32_t *bias_data, int16_t *activation_state_data,
+            const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
+            int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
+            int scale_2_b, int32_t input_zp, int32_t output_zp)
+{
+  tflite::reference_ops::EvalIntegerSVDF(&params, input_shape, input_data, weight_feature_shape,
+                                         weight_feature_data, weight_time_shape, weight_time_data,
+                                         bias_shape, bias_data, activation_state_data, output_shape,
+                                         output_data, scratchpad_data, output_temp_data, scale_1_a,
+                                         scale_1_b, scale_2_a, scale_2_b, input_zp, output_zp);
+}
+static inline void
+FloatSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+          const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
+          const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+          const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
+          const float *bias_data, float *scratchpad_data, float *activation_state_data,
+          const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::EvalFloatSVDF(&params, input_shape, input_data, weight_feature_shape,
+                                       weight_feature_data, weight_time_shape, weight_time_data,
+                                       bias_shape, bias_data, scratchpad_data,
+                                       activation_state_data, output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(
+  const luci_interpreter::DataType &input_data_type,
+  const luci_interpreter::DataType &weight_feature_data_type,
+  luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
+  luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
+  luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
+  const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
+  const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
+{
+
+  if (input_data_type == loco::DataType::FLOAT32 &&
+      (weight_feature_data_type == loco::DataType::S8 ||
+       weight_feature_data_type == loco::DataType::U8))
+  {
+    (void)input_shape;
+    (void)weight_time_shape;
+    (void)scratchpad_3;
+    (void)scratchpad_4;
+    (void)scratchpad_5;
+    (void)scratchpad_6;
+
+    throw std::runtime_error("Hybrid type is not currently supported for linux platform");
+  }
+
+  // Resize scratchpad_1 tensor
+  scratchpad_1->resize({batch_size, num_filters});
+
+  if (input_data_type == loco::DataType::S8)
+  {
+    // Resize scratchpad_2 for full_integer op
+    scratchpad_2->resize({batch_size, num_units});
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SVDF_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSlice.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSlice.h
new file mode 100644
index 000000000..640a71684
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSlice.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SLICE_H
+#define LUCI_INTERPRETER_PAL_SLICE_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Slice(const tflite::SliceParams &op_params,
+                         const tflite::RuntimeShape &input_shape, const T *input_data,
+                         const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::optimized_ops::Slice(op_params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SLICE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSoftmax.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSoftmax.h
new file mode 100644
index 000000000..b197e79d1
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSoftmax.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SOFTMAX_H
+#define LUCI_INTERPRETER_PAL_SOFTMAX_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale,
+                                              float beta)
+{
+  tflite::optimized_ops::PopulateSoftmaxLookupTable(data, input_scale, beta);
+}
+
+static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta)
+{
+  // Do nothing for linux
+  (void)params;
+  (void)input_scale;
+  (void)beta;
+}
+
+template <typename In, typename Out>
+static inline void Softmax(const tflite::SoftmaxParams &params,
+                           const tflite::RuntimeShape &input_shape, const In *input_data,
+                           const tflite::RuntimeShape &output_shape, Out *output_data)
+{
+  tflite::optimized_ops::Softmax(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SOFTMAX_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToBatchND.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToBatchND.h
new file mode 100644
index 000000000..5e8de9ba3
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToBatchND.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+#define LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+SpaceToBatchND(const tflite::SpaceToBatchParams &params,
+               const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+               const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+               const tflite::RuntimeShape &unextended_input3_shape, const int32 *paddings_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::optimized_ops::SpaceToBatchND(
+    params, unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+    unextended_input3_shape, paddings_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToDepth.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToDepth.h
new file mode 100644
index 000000000..52d2a5bb1
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSpaceToDepth.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+#define LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void SpaceToDepth(const tflite::SpaceToDepthParams &op_params,
+                                const tflite::RuntimeShape &unextended_input_shape,
+                                const T *input_data,
+                                const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::optimized_ops::SpaceToDepth(op_params, unextended_input_shape, input_data,
+                                      unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETODEPTH_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSplit.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSplit.h
new file mode 100644
index 000000000..4d8da72d8
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSplit.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPLIT_H
+#define LUCI_INTERPRETER_PAL_SPLIT_H
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename Scalar>
+static inline void Split(const tflite::SplitParams &params, const tflite::RuntimeShape &input_shape,
+                         const Scalar *input_data, const tflite::RuntimeShape *const *output_shapes,
+                         Scalar *const *output_data)
+{
+  tflite::optimized_ops::Split(params, input_shape, input_data, output_shapes, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPLIT_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/PALSub.h b/compiler/luci-micro/luci-interpreter/pal/linux/PALSub.h
new file mode 100644
index 000000000..04080d619
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/PALSub.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SUB_H
+#define LUCI_INTERPRETER_PAL_SUB_H
+
+#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Sub(const tflite::ArithmeticParams &params,
+                       const tflite::RuntimeShape &input1_shape, const T *input1_data,
+                       const tflite::RuntimeShape &input2_shape, const T *input2_data,
+                       const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::optimized_ops::Sub(params, input1_shape, input1_data, input2_shape, input2_data,
+                             output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SUB_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/linux/pal.cmake b/compiler/luci-micro/luci-interpreter/pal/linux/pal.cmake
new file mode 100644
index 000000000..185700cf9
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/linux/pal.cmake
@@ -0,0 +1,82 @@
+macro(initialize_pal)
+    nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET)
+
+    if (NOT TensorFlowSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: TensorFlow not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowGEMMLowpSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: gemmlowp not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowEigenSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: Eigen not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowRuySource_FOUND)
+        message(STATUS "Skipping luci-interpreter: Ruy not found")
+        return()
+    endif ()
+
+    find_package(Threads REQUIRED)
+
+    set(PAL_INITIALIZED TRUE)
+endmacro()
+
+macro(add_pal_to_target TGT)
+    target_include_directories(${TGT} PRIVATE "${PAL}")
+    target_include_directories(${TGT} SYSTEM PRIVATE
+            "${TensorFlowRuySource_DIR}"
+            "${TensorFlowGEMMLowpSource_DIR}"
+            "${TensorFlowEigenSource_DIR}"
+            "${TensorFlowSource_DIR}")
+    target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR})
+
+    # TODO put it back, I changed my mind.
+    # instead add sources with visitors in this library
+    set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
+            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc
+            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
+
+    if(BUILD_ARM32_NEON)
+        # NOTE may need to revise this list for version upgrade
+        set(PAL_SOURCES ${PAL_SOURCES}
+                ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc
+                ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/optimized/cpu_check.cc
+                ${TensorFlowRuySource_DIR}/ruy/allocator.cc
+                ${TensorFlowRuySource_DIR}/ruy/block_map.cc
+                ${TensorFlowRuySource_DIR}/ruy/blocking_counter.cc
+                ${TensorFlowRuySource_DIR}/ruy/context_get_ctx.cc
+                ${TensorFlowRuySource_DIR}/ruy/cpuinfo.cc
+                ${TensorFlowRuySource_DIR}/ruy/ctx.cc
+                ${TensorFlowRuySource_DIR}/ruy/denormal.cc
+                ${TensorFlowRuySource_DIR}/ruy/frontend.cc
+                ${TensorFlowRuySource_DIR}/ruy/pack_arm.cc
+                ${TensorFlowRuySource_DIR}/ruy/prepacked_cache.cc
+                ${TensorFlowRuySource_DIR}/ruy/prepare_packed_matrices.cc
+                ${TensorFlowRuySource_DIR}/ruy/system_aligned_alloc.cc
+                ${TensorFlowRuySource_DIR}/ruy/thread_pool.cc
+                ${TensorFlowRuySource_DIR}/ruy/trmul.cc
+                ${TensorFlowRuySource_DIR}/ruy/tune.cc
+                ${TensorFlowRuySource_DIR}/ruy/wait.cc
+                ${TensorFlowRuySource_DIR}/ruy/kernel_arm32.cc
+                )
+    endif(BUILD_ARM32_NEON)
+
+    add_library(luci_interpreter_linux_pal STATIC ${PAL_SOURCES})
+    set_target_properties(luci_interpreter_linux_pal PROPERTIES POSITION_INDEPENDENT_CODE ON)
+    target_include_directories(luci_interpreter_linux_pal SYSTEM PRIVATE
+            "${TensorFlowRuySource_DIR}"
+            "${TensorFlowGEMMLowpSource_DIR}"
+            "${TensorFlowEigenSource_DIR}"
+            "${TensorFlowSource_DIR}"
+    )
+
+    target_link_libraries(${TGT} PRIVATE Threads::Threads luci_interpreter_linux_pal)
+endmacro()
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst b/compiler/luci-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst
new file mode 100644
index 000000000..f0df58db3
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/KernelsToBuild.lst
@@ -0,0 +1,62 @@
+REGISTER_KERNEL(Add)
+REGISTER_KERNEL(ArgMax)
+REGISTER_KERNEL(AveragePool2D)
+REGISTER_KERNEL(BatchToSpaceND)
+REGISTER_KERNEL(Cast)
+REGISTER_KERNEL(Concatenation)
+REGISTER_KERNEL(Conv2D)
+REGISTER_KERNEL(DepthToSpace)
+REGISTER_KERNEL(DepthwiseConv2D)
+REGISTER_KERNEL(Dequantize)
+REGISTER_KERNEL(Div)
+REGISTER_KERNEL(Elu)
+REGISTER_KERNEL(Exp)
+REGISTER_KERNEL(ExpandDims)
+REGISTER_KERNEL(Fill)
+REGISTER_KERNEL(Floor)
+REGISTER_KERNEL(FloorDiv)
+REGISTER_KERNEL(Equal)
+REGISTER_KERNEL(FullyConnected)
+REGISTER_KERNEL(Greater)
+REGISTER_KERNEL(GreaterEqual)
+REGISTER_KERNEL(If)
+REGISTER_KERNEL(InstanceNorm)
+REGISTER_KERNEL(L2Normalize)
+REGISTER_KERNEL(L2Pool2D)
+REGISTER_KERNEL(LeakyRelu)
+REGISTER_KERNEL(Less)
+REGISTER_KERNEL(LessEqual)
+REGISTER_KERNEL(LogicalAnd)
+REGISTER_KERNEL(LogicalNot)
+REGISTER_KERNEL(LogicalOr)
+REGISTER_KERNEL(Logistic)
+REGISTER_KERNEL(Maximum)
+REGISTER_KERNEL(MaxPool2D)
+REGISTER_KERNEL(Minimum)
+REGISTER_KERNEL(MirrorPad)
+REGISTER_KERNEL(Mul)
+REGISTER_KERNEL(Neg)
+REGISTER_KERNEL(NotEqual)
+REGISTER_KERNEL(Pad)
+REGISTER_KERNEL(PadV2)
+REGISTER_KERNEL(PRelu)
+REGISTER_KERNEL(Quantize)
+REGISTER_KERNEL(Reshape)
+REGISTER_KERNEL(ResizeBilinear)
+REGISTER_KERNEL(ResizeNearestNeighbor)
+REGISTER_KERNEL(Rsqrt)
+REGISTER_KERNEL(Shape)
+REGISTER_KERNEL(Softmax)
+REGISTER_KERNEL(SpaceToBatchND)
+REGISTER_KERNEL(SpaceToDepth)
+REGISTER_KERNEL(StridedSlice)
+REGISTER_KERNEL(Sqrt)
+REGISTER_KERNEL(Square)
+REGISTER_KERNEL(SquaredDifference)
+REGISTER_KERNEL(Squeeze)
+REGISTER_KERNEL(Sub)
+REGISTER_KERNEL(SVDF)
+REGISTER_KERNEL(Tanh)
+REGISTER_KERNEL(Transpose)
+REGISTER_KERNEL(TransposeConv)
+REGISTER_KERNEL(While)
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALArgMax.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALArgMax.h
new file mode 100644
index 000000000..21e63296d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALArgMax.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ARGMAX_H
+#define LUCI_INTERPRETER_PAL_ARGMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/arg_min_max.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T1, typename T2, typename T3>
+static inline void ArgMinMax(const tflite::RuntimeShape &input1_shape, const T1 *input1_data,
+                             const T2 *axis, const tflite::RuntimeShape &output_shape,
+                             T3 *output_data, const std::greater<T1> cmp)
+{
+  tflite::reference_ops::ArgMinMax(input1_shape, input1_data, axis, output_shape, output_data, cmp);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ARGMAX_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALAveragePool2d.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALAveragePool2d.h
new file mode 100644
index 000000000..cce30601f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALAveragePool2d.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+#define LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void AveragePool(const tflite::PoolParams &params,
+                               const tflite::RuntimeShape &input_shape, const T *input_data,
+                               const tflite::RuntimeShape &output_shape, T *output_data,
+                               const tflite::RuntimeShape &scratchpad_shape, T *scratchpad_data)
+{
+  {
+    // MARK: At this moment this operation doesn't support
+    assert(false && "AveragePool NYI");
+    (void)params;
+    (void)input_shape;
+    (void)input_data;
+    (void)output_shape;
+    (void)output_data;
+    (void)scratchpad_shape;
+    (void)scratchpad_data;
+  }
+}
+
+template <>
+inline void AveragePool<int8_t>(const tflite::PoolParams &params,
+                                const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+                                const tflite::RuntimeShape &output_shape, int8_t *output_data,
+                                const tflite::RuntimeShape &scratchpad_shape,
+                                int8_t *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+
+  tflite::reference_integer_ops::AveragePool(params, input_shape, input_data, output_shape,
+                                             output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &output_shape)
+
+{
+  (void)input_data_type;
+  (void)input_shape;
+  (void)output_shape;
+
+  scratchpad->set_allocatable(false);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_AVERAGEPOOL2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALBatchToSpaceND.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALBatchToSpaceND.h
new file mode 100644
index 000000000..4dd77ffdc
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALBatchToSpaceND.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
+#define LUCI_INTERPRETER_PAL_ARGMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/batch_to_space_nd.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+BatchToSpaceND(const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+               const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+               const tflite::RuntimeShape &unextended_input3_shape, const int32 *crops_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::BatchToSpaceND(
+    unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+    unextended_input3_shape, crops_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_BATCHTOSPACEND_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALConv2d.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALConv2d.h
new file mode 100644
index 000000000..13976877a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALConv2d.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_CONV2D_H
+#define LUCI_INTERPRETER_PAL_CONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/conv.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/conv.h>
+
+namespace luci_interpreter_pal
+{
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+                        const float *input_data, const tflite::RuntimeShape &filter_shape,
+                        const float *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const float *bias_data, const tflite::RuntimeShape &output_shape,
+                        float *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        float *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                              bias_shape, bias_data, output_shape, output_data,
+                              tflite::RuntimeShape(), nullptr);
+}
+
+static inline void Conv(const tflite::ConvParams &params, const tflite::RuntimeShape &input_shape,
+                        const uint8 *input_data, const tflite::RuntimeShape &filter_shape,
+                        const uint8 *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+                        uint8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        uint8 *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  tflite::reference_ops::Conv(params, input_shape, input_data, filter_shape, filter_data,
+                              bias_shape, bias_data, output_shape, output_data, scratchpad_shape,
+                              scratchpad_data, nullptr);
+}
+
+static inline void ConvPerChannel(const tflite::ConvParams &params, const int32_t *mult,
+                                  const int32_t *shifts, const tflite::RuntimeShape &input_shape,
+                                  const int8 *input_data, const tflite::RuntimeShape &filter_shape,
+                                  const int8 *filter_data, const tflite::RuntimeShape &bias_shape,
+                                  const int32 *bias_data, const tflite::RuntimeShape &output_shape,
+                                  int8 *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                                  int8 *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  tflite::reference_integer_ops::ConvPerChannel(params, mult, shifts, input_shape, input_data,
+                                                filter_shape, filter_data, bias_shape, bias_data,
+                                                output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::ConvParams &params,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &filter_shape,
+                                         const tflite::RuntimeShape &output_shape)
+{
+  (void)input_data_type;
+  (void)params;
+  (void)input_shape;
+  (void)filter_shape;
+  (void)output_shape;
+  scratchpad->set_allocatable(false);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_CONV2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthToSpace.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthToSpace.h
new file mode 100644
index 000000000..8463e571e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthToSpace.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+#define LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
+
+#include <tensorflow/lite/kernels/internal/reference/depth_to_space.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void DepthToSpace(const tflite::DepthToSpaceParams &op_params,
+                                const tflite::RuntimeShape &unextended_input_shape,
+                                const T *input_data,
+                                const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::DepthToSpace(op_params, unextended_input_shape, input_data,
+                                      unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHTOSPACE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h
new file mode 100644
index 000000000..c9d1a2948
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+#define LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+DepthwiseConvPerChannel(const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+                        const int32_t *output_shift, const tflite::RuntimeShape &input_shape,
+                        const T *input_data, const tflite::RuntimeShape &filter_shape,
+                        const T *filter_data, const tflite::RuntimeShape &bias_shape,
+                        const int32_t *bias_data, const tflite::RuntimeShape &output_shape,
+                        T *output_data, const tflite::RuntimeShape &scratchpad_shape,
+                        T *scratchpad_data)
+{
+  {
+    // MARK: At this moment this operation is not supported
+    assert(false && "DepthwiseConvPerChannel NYI");
+    (void)params;
+    (void)output_multiplier;
+    (void)output_shift;
+    (void)input_shape;
+    (void)output_data;
+    (void)input_data;
+    (void)filter_shape;
+    (void)filter_data;
+    (void)bias_shape;
+    (void)bias_data;
+    (void)output_shape;
+    (void)output_data;
+    (void)scratchpad_shape;
+    (void)scratchpad_data;
+  }
+}
+
+template <>
+inline void DepthwiseConvPerChannel<int8_t>(
+  const tflite::DepthwiseParams &params, const int32_t *output_multiplier,
+  const int32_t *output_shift, const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+  const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+  const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+  const tflite::RuntimeShape &output_shape, int8_t *output_data,
+  const tflite::RuntimeShape &scratchpad_shape, int8_t *scratchpad_data)
+{
+  (void)scratchpad_shape;
+  (void)scratchpad_data;
+  tflite::reference_integer_ops::DepthwiseConvPerChannel(
+    params, output_multiplier, output_shift, input_shape, input_data, filter_shape, filter_data,
+    bias_shape, bias_data, output_shape, output_data);
+}
+
+static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
+                                         const tflite::DepthwiseParams &params,
+                                         const luci_interpreter::DataType &input_data_type,
+                                         const tflite::RuntimeShape &input_shape,
+                                         const tflite::RuntimeShape &filter_shape,
+                                         const tflite::RuntimeShape &output_shape)
+
+{
+  (void)params;
+  (void)input_data_type;
+  (void)input_shape;
+  (void)filter_shape;
+  (void)output_shape;
+
+  scratchpad->set_allocatable(false);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEPTHWISECONV2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALDequantize.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALDequantize.h
new file mode 100644
index 000000000..15ff0327b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALDequantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+#define LUCI_INTERPRETER_PAL_DEQUANTIZE_H
+
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h"
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+
+namespace luci_interpreter_pal
+{
+
+template <typename T>
+static inline void Dequantize(tflite::DequantizationParams &params,
+                              const tflite::RuntimeShape &input_shape, const T *input_data,
+                              const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_integer_ops::Dequantize<T>(params, input_shape, input_data, output_shape,
+                                               output_data);
+}
+
+static inline void Dequantize(tflite::DequantizationParams &params,
+                              const tflite::RuntimeShape &input_shape, const uint8_t *input_data,
+                              const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::Dequantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_DEQUANTIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALElu.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALElu.h
new file mode 100644
index 000000000..4089d0a0c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALElu.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_ELU_H
+#define LUCI_INTERPRETER_PAL_ELU_H
+
+#include <tensorflow/lite/kernels/internal/reference/elu.h>
+
+namespace luci_interpreter_pal
+{
+
+static inline void Elu(const tflite::RuntimeShape &input_shape, const float *input_data,
+                       const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::Elu(input_shape, input_data, output_shape, output_data);
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_ELU_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALFullyConnected.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALFullyConnected.h
new file mode 100644
index 000000000..048624d74
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALFullyConnected.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+#define LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
+
+#include <tensorflow/lite/kernels/internal/reference/fully_connected.h>
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void FullyConnected(const tflite::FullyConnectedParams &params,
+                                  const tflite::RuntimeShape &input_shape, const T *input_data,
+                                  const tflite::RuntimeShape &filter_shape, const T *filter_data,
+                                  const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+                                  const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  {
+    // MARK: At this moment this operation is not supported
+    assert(false && "FullyConnected NYI");
+    (void)params;
+    (void)input_shape;
+    (void)input_data;
+    (void)filter_shape;
+    (void)filter_data;
+    (void)bias_shape;
+    (void)bias_data;
+    (void)output_shape;
+    (void)output_data;
+  }
+}
+
+template <>
+inline void
+FullyConnected<int8_t>(const tflite::FullyConnectedParams &params,
+                       const tflite::RuntimeShape &input_shape, const int8_t *input_data,
+                       const tflite::RuntimeShape &filter_shape, const int8_t *filter_data,
+                       const tflite::RuntimeShape &bias_shape, const int32_t *bias_data,
+                       const tflite::RuntimeShape &output_shape, int8_t *output_data)
+{
+  tflite::reference_integer_ops::FullyConnected(params, input_shape, input_data, filter_shape,
+                                                filter_data, bias_shape, bias_data, output_shape,
+                                                output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_FULLYCONNECTED_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Normalize.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Normalize.h
new file mode 100644
index 000000000..f84742a44
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Normalize.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+#define LUCI_INTERPRETER_PAL_L2NORMALIZE_H
+
+#include <tensorflow/lite/kernels/internal/reference/l2normalization.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Normalization(const tflite::L2NormalizationParams &op_params,
+                                   const tflite::RuntimeShape &input_shape, const T *input_data,
+                                   const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::L2Normalization(op_params, input_shape, input_data, output_shape,
+                                         output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2NORMALIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Pool2D.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Pool2D.h
new file mode 100644
index 000000000..38a302fc6
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALL2Pool2D.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_L2POOL2D_H
+#define LUCI_INTERPRETER_PAL_L2POOL2D_H
+
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void L2Pool(const tflite::PoolParams &params, const tflite::RuntimeShape &input_shape,
+                          const T *input_data, const tflite::RuntimeShape &output_shape,
+                          T *output_data)
+{
+  tflite::reference_ops::L2Pool(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_L2POOL2D_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALLeakyRelu.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALLeakyRelu.h
new file mode 100644
index 000000000..9ccd2224f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALLeakyRelu.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_LEAKYRELU_H
+#define LUCI_INTERPRETER_PAL_LEAKYRELU_H
+
+#include <tensorflow/lite/kernels/internal/reference/leaky_relu.h>
+
+namespace luci_interpreter_pal
+{
+static inline void LeakyRelu(const tflite::LeakyReluParams &params,
+                             const tflite::RuntimeShape &input_shape, const float *input_data,
+                             const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  tflite::reference_ops::LeakyRelu(params, input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_LEAKYRELU_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALMul.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALMul.h
new file mode 100644
index 000000000..347a97a83
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALMul.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_MUL_H
+#define LUCI_INTERPRETER_PAL_MUL_H
+
+#include <tensorflow/lite/kernels/internal/reference/mul.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Mul(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+                       const T *input1_data, const tflite::RuntimeShape &input2_shape,
+                       const T *input2_data, const tflite::RuntimeShape &output_shape,
+                       T *output_data)
+{
+  tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+                                            input2_data, output_shape, output_data);
+}
+
+template <typename T>
+static inline void
+BroadcastMul4DSlow(tflite::ArithmeticParams &params, const tflite::RuntimeShape &input1_shape,
+                   const T *input1_data, const tflite::RuntimeShape &input2_shape,
+                   const T *input2_data, const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+                                            input2_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_MUL_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALNeg.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALNeg.h
new file mode 100644
index 000000000..be5903a0c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALNeg.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_NEG_H
+#define LUCI_INTERPRETER_PAL_NEG_H
+
+#include <tensorflow/lite/kernels/internal/reference/neg.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Negate(const tflite::RuntimeShape &input_shape, const T *input_data,
+                          const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::Negate(input_shape, input_data, output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_NEG_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALQuantize.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALQuantize.h
new file mode 100644
index 000000000..6046789ae
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALQuantize.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_QUANTIZE_H
+#define LUCI_INTERPRETER_PAL_QUANTIZE_H
+
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Quantize(tflite::QuantizationParams &params,
+                            const tflite::RuntimeShape &input_shape, const float *input_data,
+                            const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::AffineQuantize(params, input_shape, input_data, output_shape, output_data);
+}
+
+template <typename Input, typename Output>
+static inline void Requantize(const Input *input_data, int32_t size,
+                              int32_t effective_scale_multiplier, int32_t effective_scale_shift,
+                              int32_t input_zero_point, int32_t output_zero_point,
+                              Output *output_data)
+{
+  tflite::reference_ops::Requantize(input_data, size, effective_scale_multiplier,
+                                    effective_scale_shift, input_zero_point, output_zero_point,
+                                    output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_QUANTIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeBilinear.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeBilinear.h
new file mode 100644
index 000000000..cc9f0fd54
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeBilinear.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+#define LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
+
+#include <tensorflow/lite/kernels/internal/reference/resize_bilinear.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeBilinear(const tflite::ResizeBilinearParams &op_params,
+               const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+               const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::ResizeBilinear(op_params, unextended_input_shape, input_data,
+                                        output_size_shape, output_size_data,
+                                        unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZEBILINEAR_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h
new file mode 100644
index 000000000..f4d5a6ed3
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALResizeNearestNeighbor.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+#define LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
+
+#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+ResizeNearestNeighbor(const tflite::ResizeNearestNeighborParams &op_params,
+                      const tflite::RuntimeShape &unextended_input_shape, const T *input_data,
+                      const tflite::RuntimeShape &output_size_shape, const int32 *output_size_data,
+                      const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::ResizeNearestNeighbor(op_params, unextended_input_shape, input_data,
+                                               output_size_shape, output_size_data,
+                                               unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_RESIZENEARESTNEIGHBOR_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALSVDF.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSVDF.h
new file mode 100644
index 000000000..3bba668fb
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSVDF.h
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SVDF_H
+#define LUCI_INTERPRETER_PAL_SVDF_H
+
+#include <tensorflow/lite/kernels/internal/reference/svdf.h>
+
+namespace luci_interpreter_pal
+{
+static inline void
+IntegerSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+            const int8_t *input_data, const tflite::RuntimeShape &weight_feature_shape,
+            const int8_t *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+            const int16_t *weight_time_data, const tflite::RuntimeShape &bias_shape,
+            const int32_t *bias_data, int16_t *activation_state_data,
+            const tflite::RuntimeShape &output_shape, int8_t *output_data, int32_t *scratchpad_data,
+            int32_t *output_temp_data, int32_t scale_1_a, int scale_1_b, int32_t scale_2_a,
+            int scale_2_b, int32_t input_zp, int32_t output_zp)
+{
+  const int n_rank = params.rank;
+  const int n_batch = input_shape.Dims(0);
+  const int n_input = input_shape.Dims(1);
+  const int n_filter = weight_feature_shape.Dims(0);
+  const int n_unit = n_filter / n_rank;
+  const int n_memory = weight_time_shape.Dims(1);
+
+  // Left shift the activation_state.
+  {
+    int16_t *new_state_start = activation_state_data;
+    const int16_t *old_state_start = activation_state_data + 1;
+    const int16_t *old_state_end = activation_state_data + n_batch * n_filter * n_memory;
+    while (old_state_start != old_state_end)
+    {
+      *new_state_start++ = *old_state_start++;
+    }
+  }
+
+  // Note: no need to clear the latest activation, matmul is not accumulative.
+
+  // Feature matmul.
+  {
+    const int32_t output_max = std::numeric_limits<int16_t>::max();
+    const int32_t output_min = std::numeric_limits<int16_t>::min();
+    int16_t *result_in_batch = activation_state_data + (n_memory - 1);
+    for (int b = 0; b < n_batch; b++)
+    {
+      const int8_t *matrix_ptr = weight_feature_data;
+      for (int r = 0; r < n_filter; r++)
+      {
+        int32_t dot_prod = 0;
+        const int8_t *vector_in_batch = input_data + b * n_input;
+        for (int c = 0; c < n_input; c++)
+        {
+          dot_prod += *matrix_ptr++ * (*vector_in_batch++ - input_zp);
+        }
+        dot_prod = tflite::MultiplyByQuantizedMultiplier(dot_prod, scale_1_a, scale_1_b);
+        dot_prod = std::min(std::max(output_min, dot_prod), output_max);
+        // This assumes state is symmetrically quantized. Otherwise last bit of
+        // state should be initialized to its zero point and accumulate the
+        // dot_prod.
+        // Equivalent as the following:
+        //     result_in_batch = zero point, which happens to be zero.
+        //     result_in_batch += dot_prod_56.
+        *result_in_batch = dot_prod;
+        result_in_batch += n_memory;
+      }
+    }
+  }
+
+  // Time.
+  {
+    for (int b = 0; b < n_batch; ++b)
+    {
+      int32_t *scratch_ptr_batch = scratchpad_data + b * n_filter;
+
+      // Perform batched vector dot product:
+      const int16_t *vector1_ptr = weight_time_data;
+      const int16_t *vector2_ptr = activation_state_data + b * n_memory * n_filter;
+
+      for (int i = 0; i < n_filter; i++)
+      {
+        *scratch_ptr_batch = 0;
+        for (int j = 0; j < n_memory; j++)
+        {
+          *scratch_ptr_batch += *vector1_ptr++ * *vector2_ptr++;
+        }
+        scratch_ptr_batch++;
+      }
+    }
+  }
+
+  // Reduce, add bias, rescale, activation.
+  {
+    // Add bias.
+    if (bias_data)
+    {
+      // Vector batch assign:
+      for (int i = 0; i < n_batch; ++i)
+      {
+        int32_t *output_ptr = output_temp_data + i * n_unit;
+        const int32_t *bias_ptr = bias_data;
+        for (int j = 0; j < n_unit; ++j)
+        {
+          *output_ptr++ = *bias_ptr++;
+        }
+      }
+    }
+    else
+    {
+      int32_t *output_ptr = output_temp_data;
+      for (int i = 0; i < n_batch * n_unit; ++i)
+      {
+        *output_ptr++ = 0;
+      }
+    }
+
+    // Reduce.
+    for (int b = 0; b < n_batch; ++b)
+    {
+      int32_t *output_temp_ptr = output_temp_data + b * n_unit;
+      int32_t *scratch_ptr_batch = scratchpad_data + b * n_filter;
+
+      // Reduction sum vector
+      for (int i = 0; i < n_unit; ++i)
+      {
+        for (int j = 0; j < n_rank; ++j)
+        {
+          output_temp_ptr[i] += *scratch_ptr_batch++;
+        }
+      }
+    }
+
+    // Rescale.
+    const int32_t output_max = std::numeric_limits<int8_t>::max();
+    const int32_t output_min = std::numeric_limits<int8_t>::min();
+    for (int i = 0; i < n_batch * n_unit; ++i)
+    {
+      int32_t x1 = output_temp_data[i];
+      int32_t x2 = tflite::MultiplyByQuantizedMultiplier(x1, scale_2_a, scale_2_b);
+      int32_t x3 = x2 + output_zp;
+      int32_t x4 = std::min(std::max(output_min, x3), output_max);
+      output_data[i] = static_cast<int8_t>(x4);
+    }
+  }
+}
+static inline void
+FloatSVDF(const TfLiteSVDFParams &params, const tflite::RuntimeShape &input_shape,
+          const float *input_data, const tflite::RuntimeShape &weight_feature_shape,
+          const float *weight_feature_data, const tflite::RuntimeShape &weight_time_shape,
+          const float *weight_time_data, const tflite::RuntimeShape &bias_shape,
+          const float *bias_data, float *scratchpad_data, float *activation_state_data,
+          const tflite::RuntimeShape &output_shape, float *output_data)
+{
+  const int32_t rank = params.rank;
+  const int32_t batch_size = input_shape.Dims(0);
+  const int32_t input_size = input_shape.Dims(1);
+  const int32_t num_filters = weight_feature_shape.Dims(0);
+  const int32_t num_units = num_filters / rank;
+  const int32_t memory_size = weight_time_shape.Dims(1);
+
+  // Left shift the activation_state.
+  {
+    float *new_state_start = activation_state_data;
+    const float *old_state_start = activation_state_data + 1;
+    const float *old_state_end = activation_state_data + batch_size * num_filters * memory_size;
+    while (old_state_start != old_state_end)
+    {
+      *new_state_start++ = *old_state_start++;
+    }
+  }
+
+  // Note: no need to clear the latest activation, matmul is not accumulative.
+
+  // Compute conv1d(inputs, weights_feature).
+  // The activation_state's rightmost column is used to save current cycle
+  // activation. This is achieved by starting at state_ptr[memory_size - 1] and
+  // having the stride equal to memory_size.
+
+  // Perform batched matrix vector multiply operation:
+  {
+    const float *matrix = weight_feature_data;
+    const float *vector = input_data;
+    float *result = &activation_state_data[memory_size - 1];
+    float *result_in_batch = result;
+    for (int i = 0; i < batch_size; ++i)
+    {
+      const float *matrix_ptr = matrix;
+      for (int j = 0; j < num_filters; ++j)
+      {
+        float dot_prod = 0.0f;
+        const float *vector_in_batch = vector + i * input_size;
+        for (int k = 0; k < input_size; ++k)
+        {
+          dot_prod += *matrix_ptr++ * *vector_in_batch++;
+        }
+        *result_in_batch = dot_prod;
+        result_in_batch += memory_size;
+      }
+    }
+  }
+
+  tflite::reference_ops::ApplyTimeWeightsBiasAndActivation(
+    batch_size, memory_size, num_filters, num_units, rank, weight_time_data, bias_data,
+    params.activation, activation_state_data, scratchpad_data, output_data);
+}
+
+static inline void SetupScratchpadTensor(
+  const luci_interpreter::DataType &input_data_type,
+  const luci_interpreter::DataType &weight_feature_data_type,
+  luci_interpreter::Tensor *scratchpad_1, luci_interpreter::Tensor *scratchpad_2,
+  luci_interpreter::Tensor *scratchpad_3, luci_interpreter::Tensor *scratchpad_4,
+  luci_interpreter::Tensor *scratchpad_5, luci_interpreter::Tensor *scratchpad_6,
+  const luci_interpreter::Shape input_shape, const luci_interpreter::Shape weight_time_shape,
+  const int32_t batch_size, const int32_t num_filters, const int32_t num_units)
+{
+
+  if (input_data_type == loco::DataType::FLOAT32 &&
+      (weight_feature_data_type == loco::DataType::S8 ||
+       weight_feature_data_type == loco::DataType::U8))
+  {
+    (void)input_shape;
+    (void)weight_time_shape;
+    (void)scratchpad_3;
+    (void)scratchpad_4;
+    (void)scratchpad_5;
+    (void)scratchpad_6;
+
+    throw std::runtime_error("Hybrid type is not currently supported for mcu platform");
+  }
+
+  // Resize scratchpad_1 tensor
+  scratchpad_1->resize({batch_size, num_filters});
+
+  if (input_data_type == loco::DataType::S8)
+  {
+    // Resize scratchpad_2 for full_integer op
+    scratchpad_2->resize({batch_size, num_units});
+  }
+}
+
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SVDF_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALSoftmax.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSoftmax.h
new file mode 100644
index 000000000..9838b542d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSoftmax.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SOFTMAX_H
+#define LUCI_INTERPRETER_PAL_SOFTMAX_H
+
+#include <tensorflow/lite/kernels/internal/reference/softmax.h>
+
+namespace luci_interpreter_pal
+{
+static inline void PopulateSoftmaxLookupTable(tflite::SoftmaxParams *data, float input_scale,
+                                              float beta)
+{
+  // Do nothing for mcu
+  (void)data;
+  (void)input_scale;
+  (void)beta;
+}
+
+static inline void InitializeParams(tflite::SoftmaxParams *params, float input_scale, float beta)
+{
+  int32 input_beta_multiplier;
+  int input_beta_left_shift;
+  static const int kScaledDiffIntegerBits = 5;
+  tflite::PreprocessSoftmaxScaling(beta, input_scale, kScaledDiffIntegerBits,
+                                   &input_beta_multiplier, &input_beta_left_shift);
+
+  params->input_multiplier = input_beta_multiplier;
+  params->input_left_shift = input_beta_left_shift;
+  params->diff_min =
+    -tflite::CalculateInputRadius(kScaledDiffIntegerBits, params->input_left_shift);
+}
+
+template <typename T>
+static inline void Softmax(const tflite::SoftmaxParams &params,
+                           const tflite::RuntimeShape &input_shape, const T *input_data,
+                           const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  // MARK: At this moment this operation doesn't support on mcu
+  assert(false && "Softmax NYI");
+  (void)params;
+  (void)input_shape;
+  (void)input_data;
+  (void)output_shape;
+  (void)output_data;
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SOFTMAX_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToBatchND.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToBatchND.h
new file mode 100644
index 000000000..fdddaa929
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToBatchND.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+#define LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
+
+#include <tensorflow/lite/kernels/internal/reference/space_to_batch_nd.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void
+SpaceToBatchND(const tflite::SpaceToBatchParams &params,
+               const tflite::RuntimeShape &unextended_input1_shape, const T *input1_data,
+               const tflite::RuntimeShape &unextended_input2_shape, const int32 *block_shape_data,
+               const tflite::RuntimeShape &unextended_input3_shape, const int32 *paddings_data,
+               const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::SpaceToBatchND(
+    params, unextended_input1_shape, input1_data, unextended_input2_shape, block_shape_data,
+    unextended_input3_shape, paddings_data, unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETOBATCHND_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToDepth.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToDepth.h
new file mode 100644
index 000000000..816b7f663
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSpaceToDepth.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+#define LUCI_INTERPRETER_PAL_SPACETODEPTH_H
+
+#include <tensorflow/lite/kernels/internal/reference/space_to_depth.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void SpaceToDepth(const tflite::SpaceToDepthParams &op_params,
+                                const tflite::RuntimeShape &unextended_input_shape,
+                                const T *input_data,
+                                const tflite::RuntimeShape &unextended_output_shape, T *output_data)
+{
+  tflite::reference_ops::SpaceToDepth(op_params, unextended_input_shape, input_data,
+                                      unextended_output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SPACETODEPTH_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/PALSub.h b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSub.h
new file mode 100644
index 000000000..ea57578c6
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/PALSub.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_PAL_SUB_H
+#define LUCI_INTERPRETER_PAL_SUB_H
+
+#include <tensorflow/lite/kernels/internal/reference/sub.h>
+
+namespace luci_interpreter_pal
+{
+template <typename T>
+static inline void Sub(const tflite::ArithmeticParams &params,
+                       const tflite::RuntimeShape &input1_shape, const T *input1_data,
+                       const tflite::RuntimeShape &input2_shape, const T *input2_data,
+                       const tflite::RuntimeShape &output_shape, T *output_data)
+{
+  tflite::reference_ops::Sub(params, input1_shape, input1_data, input2_shape, input2_data,
+                             output_shape, output_data);
+}
+} // namespace luci_interpreter_pal
+
+#endif // LUCI_INTERPRETER_PAL_SUB_H
diff --git a/compiler/luci-micro/luci-interpreter/pal/mcu/pal.cmake b/compiler/luci-micro/luci-interpreter/pal/mcu/pal.cmake
new file mode 100644
index 000000000..907d51de6
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/pal/mcu/pal.cmake
@@ -0,0 +1,56 @@
+macro(initialize_pal)
+    nnas_find_package(TensorFlowSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowEigenSource EXACT 2.6.0 QUIET)
+    nnas_find_package(TensorFlowRuySource EXACT 2.6.0 QUIET)
+
+    if (NOT TensorFlowSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: TensorFlow not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowGEMMLowpSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: gemmlowp not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowEigenSource_FOUND)
+        message(STATUS "Skipping luci-interpreter: Eigen not found")
+        return()
+    endif ()
+
+    if (NOT TensorFlowRuySource_FOUND)
+        message(STATUS "Skipping luci-interpreter: Ruy not found")
+        return()
+    endif ()
+    #find_package(Threads REQUIRED)
+
+    set(PAL_INITIALIZED TRUE)
+endmacro()
+
+macro(add_pal_to_target TGT)
+    target_include_directories(${TGT} PRIVATE "${PAL}")
+    target_include_directories(${TGT} PRIVATE
+            "${TensorFlowRuySource_DIR}"
+            "${TensorFlowGEMMLowpSource_DIR}"
+            "${TensorFlowEigenSource_DIR}"
+            "${TensorFlowSource_DIR}")
+    target_include_directories(${TGT} PRIVATE ${LUCI_INTERPRETER_PAL_DIR})
+
+    # TODO put it back, I changed my mind.
+    # instead add sources with visitors in this library
+    set(PAL_SOURCES ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc
+            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/tensor_utils.cc
+            ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc)
+    add_library(luci_interpreter_mcu_pal STATIC ${PAL_SOURCES})
+    set_target_properties(luci_interpreter_mcu_pal PROPERTIES POSITION_INDEPENDENT_CODE ON)
+    target_include_directories(luci_interpreter_mcu_pal PRIVATE
+            "${TensorFlowRuySource_DIR}"
+            "${TensorFlowGEMMLowpSource_DIR}"
+            "${TensorFlowEigenSource_DIR}"
+            "${TensorFlowSource_DIR}"
+    )
+
+    target_link_libraries(${TGT} PRIVATE luci_interpreter_mcu_pal)
+    #target_link_libraries(${TGT} PRIVATE Threads::Threads luci_interpreter_mcu_pal)
+endmacro()
diff --git a/compiler/luci-micro/luci-interpreter/requires.cmake b/compiler/luci-micro/luci-interpreter/requires.cmake
new file mode 100644
index 000000000..f411f387a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/requires.cmake
@@ -0,0 +1 @@
+require(luci)
diff --git a/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.cpp b/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.cpp
new file mode 100644
index 000000000..6ad1f320c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/BuddyMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+BuddyMemoryManager::BuddyMemoryManager(uint8_t *memory_start, int32_t memSize)
+{
+  int32_t p = lowerLog2(memSize);
+
+  // We assume that the requested size of memory does not exceed 4 GB
+  assert(p < 32);
+  memSize = 1 << p;
+
+  _start_block = reinterpret_cast<Block *>(memory_start);
+  _start_block->size = memSize - sizeof(Block);
+  _start_block->is_free = true;
+  _start_block->self = _start_block;
+  _num_blocks = 0;
+  _size = _start_block->size;
+
+  for (auto &_free_block : _free_blocks)
+    _free_block = nullptr;
+
+  addToBlocks(_start_block, p);
+}
+
+void BuddyMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
+{
+  const size_t element_size = getDataTypeSize(tensor.element_type());
+  const int32_t num_elements = tensor.shape().num_elements();
+  auto size = num_elements * element_size;
+  auto footprint = size + sizeof(Block);
+  auto l = (footprint & (footprint - 1)) == 0
+             ? lowerLog2(footprint)
+             : lowerLog2(footprint) + 1; // check footprint is pow_of_2
+
+  while (l < 32 && !_free_blocks[l])
+    l++;
+
+  assert(l < 32);
+
+  Block *tmp;
+  tmp = _free_blocks[l];
+  removeFromBlocks(tmp, l);
+
+  while ((tmp->size + sizeof(Block)) / 2 >= size + sizeof(Block))
+  {
+    divideBlock(tmp, l);
+    l--;
+  }
+
+  tmp->is_free = false;
+  tmp->self = tmp;
+  _num_blocks++;
+
+  auto *data = (uint8_t *)(tmp + 1);
+  tensor.set_data_buffer(data);
+}
+
+void BuddyMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
+{
+  auto data = tensor.data<void>();
+  auto *tmp = (Block *)((uint8_t *)data - sizeof(Block));
+
+  assert(tmp->self == tmp);
+
+  tmp->is_free = true;
+  addToBlocks(tmp, lowerLog2(tmp->size + sizeof(Block)));
+
+  while (tmp)
+    if (tmp->size == _size)
+      break;
+    else
+      tmp = mergeBlock(tmp);
+
+  _num_blocks--;
+  tensor.set_data_buffer(nullptr);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.test.cpp b/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.test.cpp
new file mode 100644
index 000000000..29fb767b7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/BuddyMemoryManager.test.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/BuddyMemoryManager.h"
+#include <gtest/gtest.h>
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(BuddyMemoryManager, basic)
+{
+  auto mem_pool = std::make_unique<uint8_t[]>(200);
+  auto buddy_memory_manager = std::make_unique<BuddyMemoryManager>(mem_pool.get(), 130);
+  Tensor first_tensor(DataType::U8, Shape({8}), AffineQuantization{}, "first_tensor");
+
+  buddy_memory_manager->allocate_memory(first_tensor);
+
+  uint8_t data_1[] = {1, 2, 3, 4, 5, 6, 7, 8};
+
+  first_tensor.writeData(data_1, 8);
+  uint8_t array_1[8];
+  first_tensor.readData(array_1, 8);
+  for (int i = 0; i < 8; i++)
+  {
+    EXPECT_EQ(data_1[i], array_1[i]);
+  }
+
+  Tensor second_tensor(DataType::U8, Shape({2, 5}), AffineQuantization{}, "second_tensor");
+  buddy_memory_manager->allocate_memory(second_tensor);
+
+  uint8_t data_2[2][5] = {{11, 22, 33, 44, 55}, {12, 23, 34, 45, 56}};
+  second_tensor.writeData(data_2, 10);
+
+  uint8_t array_2[2][5];
+  second_tensor.readData(array_2, 10);
+  for (int i = 0; i < 2; i++)
+  {
+    for (int j = 0; j < 5; j++)
+    {
+      EXPECT_EQ(data_2[i][j], array_2[i][j]);
+    }
+  }
+
+  buddy_memory_manager->release_memory(first_tensor);
+  EXPECT_EQ(first_tensor.data<void>(), nullptr);
+
+  buddy_memory_manager->release_memory(second_tensor);
+  EXPECT_EQ(second_tensor.data<void>(), nullptr);
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/src/CMakeLists.txt
new file mode 100644
index 000000000..997b75a84
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/CMakeLists.txt
@@ -0,0 +1,61 @@
+include("${LUCI_INTERPRETER_PAL_DIR}/pal.cmake")
+
+initialize_pal()
+
+if (NOT PAL_INITIALIZED)
+  message("PAL Failed to initialize, skip luci-interpreter")
+  return()
+endif()
+
+message(STATUS "LUCI INTERPRETER BEGIN")
+
+set(LUCI_INTERPRETER_BINARY "luci_interpreter${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_CORE "luci_interpreter_core${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_KERNELS "luci_interpreter_kernels${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_LOADER "luci_interpreter_loader${LUCI_INTERPRETER_SUFFIX}")
+set(LUCI_INTERPRETER_IMPORT "luci_interpreter_import${LUCI_INTERPRETER_SUFFIX}")
+
+add_subdirectory(core)
+message(STATUS "LUCI INTERPRETER CORE")
+add_subdirectory(kernels)
+message(STATUS "LUCI INTERPRETER KERNELS")
+add_subdirectory(loader)
+message(STATUS "LUCI INTERPRETER LOADER")
+add_subdirectory(import)
+message(STATUS "LUCI INTERPRETER IMPORT")
+
+message(STATUS "LUCI INTERPTER INITALIZED")
+
+set(SOURCES
+    "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/Interpreter.h"
+    Interpreter.cpp "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/SimpleMemoryManager.h" SimpleMemoryManager.cpp
+        "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/TestMemoryManager.h" TestMemoryManager.cpp
+        "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/BuddyMemoryManager.h" BuddyMemoryManager.cpp
+        "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/StaticMemoryManager.h" StaticMemoryManager.cpp)
+
+if (NOT LUCI_INTERPRETER_STATIC)
+  add_library(${LUCI_INTERPRETER_BINARY} SHARED ${SOURCES})
+else ()
+  add_library(${LUCI_INTERPRETER_BINARY} STATIC ${SOURCES})
+endif ()
+
+set(TEST_SOURCES BuddyMemoryManager.test.cpp)
+
+target_include_directories(${LUCI_INTERPRETER_BINARY} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
+target_include_directories(${LUCI_INTERPRETER_BINARY} PRIVATE "${LUCI_INTERPRETER_SOURCE_DIR}")
+target_link_libraries(${LUCI_INTERPRETER_BINARY}
+    PUBLIC luci_lang ${LUCI_INTERPRETER_LOADER} ${LUCI_INTERPRETER_CORE}
+    PRIVATE nncc_common)
+
+install(TARGETS ${LUCI_INTERPRETER_BINARY} DESTINATION lib)
+install(DIRECTORY include/ DESTINATION include
+        FILES_MATCHING PATTERN "*.h")
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+GTest_AddTest(buddy_manager_test ${TEST_SOURCES})
+target_link_libraries(buddy_manager_test ${LUCI_INTERPRETER_BINARY})
diff --git a/compiler/luci-micro/luci-interpreter/src/Interpreter.cpp b/compiler/luci-micro/luci-interpreter/src/Interpreter.cpp
new file mode 100644
index 000000000..8cf272efd
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/Interpreter.cpp
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/Interpreter.h"
+#include "luci_interpreter/SimpleMemoryManager.h"
+
+#include "loader/ModuleLoader.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace
+{
+
+class EventNotifierImpl final : public EventNotifier
+{
+public:
+  EventNotifierImpl(const RuntimeToIR &runtime_to_ir,
+                    const std::vector<ExecutionObserver *> &observers)
+    : _runtime_to_ir(runtime_to_ir), _observers(observers)
+  {
+  }
+
+  void postTensorWrite(const Tensor *tensor) override
+  {
+    assert(tensor != nullptr);
+    for (const auto &observer : _observers)
+    {
+      observer->postTensorWrite(_runtime_to_ir.tensor_to_node.at(tensor), tensor);
+    }
+  }
+
+  void preOperatorExecute(const Kernel *kernel) override
+  {
+    assert(kernel != nullptr);
+    for (const auto &observer : _observers)
+    {
+      observer->preOperatorExecute(_runtime_to_ir.kernel_to_node.at(kernel));
+    }
+  }
+
+  void postOperatorExecute(const Kernel *kernel) override
+  {
+    assert(kernel != nullptr);
+    for (const auto &observer : _observers)
+    {
+      observer->postOperatorExecute(_runtime_to_ir.kernel_to_node.at(kernel));
+    }
+  }
+
+private:
+  const RuntimeToIR &_runtime_to_ir;
+  const std::vector<ExecutionObserver *> &_observers;
+};
+
+} // namespace
+
+Interpreter::Interpreter(const luci::Module *module)
+{
+  _runtime_to_ir = std::make_unique<RuntimeToIR>();
+  _event_notifier = std::make_unique<EventNotifierImpl>(*_runtime_to_ir, _observers);
+  _runtime_module = std::make_unique<RuntimeModule>(_event_notifier.get());
+
+  _default_memory_manager = std::make_unique<SimpleMemoryManager>();
+
+  ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor,
+                      _default_memory_manager.get());
+  loader.load();
+}
+
+Interpreter::Interpreter(const luci::Module *module,
+                         luci_interpreter::IMemoryManager *memory_manager)
+{
+  assert(memory_manager && "Use Interpreter::Interpreter(module) constructor instead");
+
+  _runtime_to_ir = std::make_unique<RuntimeToIR>();
+  _event_notifier = std::make_unique<EventNotifierImpl>(*_runtime_to_ir, _observers);
+  _runtime_module = std::make_unique<RuntimeModule>(_event_notifier.get());
+
+  ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor,
+                      memory_manager);
+  loader.load();
+}
+
+Interpreter::~Interpreter() = default;
+
+void Interpreter::writeInputTensor(const luci::CircleInput *input_node, const void *data,
+                                   size_t data_size)
+{
+  Tensor *tensor = _runtime_module->getInputTensors()[input_node->index()];
+  if (tensor == nullptr)
+  {
+    const std::string &name = input_node->name();
+    throw std::runtime_error("Cannot find tensor for input node named \"" + name + "\".");
+  }
+  if (data != nullptr)
+    tensor->writeData(data, data_size);
+}
+
+void Interpreter::readOutputTensor(const luci::CircleOutput *output_node, void *data,
+                                   size_t data_size)
+{
+  Tensor *tensor = _runtime_module->getOutputTensors()[output_node->index()];
+  if (tensor == nullptr)
+  {
+    const std::string &name = output_node->name();
+    throw std::runtime_error("Cannot find tensor for output node named \"" + name + "\".");
+  }
+  if (data != nullptr)
+    tensor->readData(data, data_size);
+}
+
+void Interpreter::interpret() { _runtime_module->execute(); }
+
+void Interpreter::attachObserver(ExecutionObserver *observer)
+{
+  if (std::find(_observers.cbegin(), _observers.cend(), observer) != _observers.cend())
+    throw std::runtime_error("Observer is already attached.");
+  _observers.push_back(observer);
+}
+
+ExecutionObserver::~ExecutionObserver() = default;
+
+void ExecutionObserver::postTensorWrite(const luci::CircleNode *, const Tensor *) {}
+
+void ExecutionObserver::preOperatorExecute(const luci::CircleNode *) {}
+
+void ExecutionObserver::postOperatorExecute(const luci::CircleNode *) {}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/SimpleMemoryManager.cpp b/compiler/luci-micro/luci-interpreter/src/SimpleMemoryManager.cpp
new file mode 100644
index 000000000..230e39896
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/SimpleMemoryManager.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/SimpleMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+void SimpleMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
+{
+  if (!tensor.is_allocatable())
+  {
+    return;
+  }
+  if (tensor.is_data_allocated())
+  {
+    release_memory(tensor);
+  }
+  const auto element_size = getDataTypeSize(tensor.element_type());
+  const auto num_elements = tensor.shape().num_elements();
+
+  auto *data = new uint8_t[num_elements * element_size];
+  tensor.set_data_buffer(data);
+}
+
+void SimpleMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
+{
+  if (!tensor.is_data_allocated())
+  {
+    tensor.set_data_buffer(nullptr);
+    return;
+  }
+  auto data = tensor.data<uint8_t>();
+  delete[] data;
+  tensor.set_data_buffer(nullptr);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/StaticMemoryManager.cpp b/compiler/luci-micro/luci-interpreter/src/StaticMemoryManager.cpp
new file mode 100644
index 000000000..73a819919
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/StaticMemoryManager.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/StaticMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+void StaticMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
+{
+  if (!tensor.is_allocatable())
+  {
+    return;
+  }
+  int32_t offset = tensor.get_offset();
+  assert(offset >= 0);
+  auto tensor_ptr = _buffer_ptr + offset;
+  tensor.set_data_buffer(tensor_ptr);
+}
+
+void StaticMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
+{
+  tensor.set_data_buffer(nullptr);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/TestMemoryManager.cpp b/compiler/luci-micro/luci-interpreter/src/TestMemoryManager.cpp
new file mode 100644
index 000000000..3beeee55c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/TestMemoryManager.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+
+void TestMemoryManager::allocate_memory(luci_interpreter::Tensor &tensor)
+{
+  if (!tensor.is_allocatable())
+  {
+    return;
+  }
+  if (tensor.is_data_allocated())
+  {
+    release_memory(tensor);
+  }
+  const auto element_size = getDataTypeSize(tensor.element_type());
+  const auto num_elements = tensor.shape().num_elements();
+
+  auto *data = new uint8_t[num_elements * element_size];
+  allocations.push_back(data);
+  tensor.set_data_buffer(data);
+}
+
+void TestMemoryManager::release_memory(luci_interpreter::Tensor &tensor)
+{
+  tensor.set_data_buffer(nullptr);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/core/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/src/core/CMakeLists.txt
new file mode 100644
index 000000000..c2471e01c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/core/CMakeLists.txt
@@ -0,0 +1,19 @@
+set(SOURCES
+    "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/core/DataType.h"
+    "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/core/Tensor.h"
+    EventNotifier.h
+    Kernel.h
+    KernelParams.h
+    RuntimeGraph.h
+    RuntimeGraph.cpp
+    RuntimeModule.h
+    Tensor.cpp)
+
+add_library(${LUCI_INTERPRETER_CORE} STATIC ${SOURCES})
+if (NOT NNCC_LIBRARY_NO_PIC)
+    set_target_properties(${LUCI_INTERPRETER_CORE} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
+target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
+target_include_directories(${LUCI_INTERPRETER_CORE} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
+target_link_libraries(${LUCI_INTERPRETER_CORE} PUBLIC luci_lang)
+target_link_libraries(${LUCI_INTERPRETER_CORE} PRIVATE nncc_common)
diff --git a/compiler/luci-micro/luci-interpreter/src/core/EventNotifier.h b/compiler/luci-micro/luci-interpreter/src/core/EventNotifier.h
new file mode 100644
index 000000000..5c4fbd3be
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/core/EventNotifier.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_EVENTNOTIFIER_H
+#define LUCI_INTERPRETER_CORE_EVENTNOTIFIER_H
+
+namespace luci_interpreter
+{
+
+// Used at execution stage to tell the interpreter that the runtime state has changed in some way.
+class EventNotifier
+{
+public:
+  virtual ~EventNotifier() = default;
+
+  virtual void postTensorWrite(const Tensor *tensor) = 0;
+  virtual void preOperatorExecute(const Kernel *kernel) = 0;
+  virtual void postOperatorExecute(const Kernel *kernel) = 0;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_EVENTNOTIFIER_H
diff --git a/compiler/luci-micro/luci-interpreter/src/core/Kernel.h b/compiler/luci-micro/luci-interpreter/src/core/Kernel.h
new file mode 100644
index 000000000..a7c4a4218
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/core/Kernel.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_KERNEL_H
+#define LUCI_INTERPRETER_CORE_KERNEL_H
+
+#include "luci_interpreter/core/Tensor.h"
+
+#include <vector>
+
+namespace luci_interpreter
+{
+
+// Base class for all kernels.
+class Kernel
+{
+protected:
+  Kernel(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs)
+    : _inputs(std::move(inputs)), _outputs(std::move(outputs))
+  {
+  }
+
+public:
+  virtual ~Kernel() = default;
+
+  const std::vector<const Tensor *> &getInputTensors() const { return _inputs; }
+  const std::vector<Tensor *> &getOutputTensors() const { return _outputs; }
+
+  // Configures the kernel.
+  // This function is currently called once for each kernel during interpreter construction,
+  // which makes it a convenient place for preparing (resizing) output tensors.
+  virtual void configure() = 0;
+
+  // Executes the kernel.
+  virtual void execute() const = 0;
+
+protected:
+  // NOTE Prefer not to use these in derived classes.
+  const std::vector<const Tensor *> _inputs;
+  const std::vector<Tensor *> _outputs;
+};
+
+// Base class for kernels with parameters.
+template <typename Params> class KernelWithParams : public Kernel
+{
+protected:
+  KernelWithParams(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs,
+                   const Params &params)
+    : Kernel(std::move(inputs), std::move(outputs)), _params(params)
+  {
+  }
+
+public:
+  const Params &params() const { return _params; }
+
+protected:
+  const Params _params;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_KERNEL_H
diff --git a/compiler/luci-micro/luci-interpreter/src/core/KernelParams.h b/compiler/luci-micro/luci-interpreter/src/core/KernelParams.h
new file mode 100644
index 000000000..6c0220c62
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/core/KernelParams.h
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_KERNELPARAMS_H
+#define LUCI_INTERPRETER_CORE_KERNELPARAMS_H
+
+#include <luci/IR/AttrPadding.h>
+#include <luci/IR/AttrFusedActFunc.h>
+#include <luci/IR/AttrMirrorPadMode.h>
+#include <luci_interpreter/core/DataType.h>
+
+#include <cstdint>
+#include <vector>
+
+namespace luci_interpreter
+{
+
+// Inject commonly used types into `luci_interpreter` namespace for convenience.
+using Activation = luci::FusedActFunc;
+using Padding = luci::Padding;
+using MirrorPadMode = luci::MirrorPadMode;
+
+struct AddParams
+{
+  Activation activation;
+};
+
+struct ArgMaxParams
+{
+  DataType output_type;
+};
+
+struct BatchMatMulParams
+{
+  bool adj_x;
+  bool adj_y;
+};
+
+struct ConcatenationParams
+{
+  int axis;
+  Activation activation;
+};
+
+struct Conv2DParams
+{
+  Padding padding;
+  int32_t stride_height;
+  int32_t stride_width;
+  int32_t dilation_height_factor;
+  int32_t dilation_width_factor;
+  Activation activation;
+};
+
+struct DepthToSpaceParams
+{
+  int block_size;
+};
+
+struct DepthwiseConv2DParams
+{
+  Padding padding;
+  int32_t depth_multiplier; // TODO Remove, as it can be calculated.
+  int32_t stride_height;
+  int32_t stride_width;
+  int32_t dilation_height_factor;
+  int32_t dilation_width_factor;
+  Activation activation;
+};
+
+struct DivParams
+{
+  Activation activation;
+};
+
+struct FullyConnectedParams
+{
+  Activation activation;
+  bool keep_num_dims = false;
+};
+
+struct GatherParams
+{
+  int32_t axis;
+  int32_t batch_dims;
+};
+
+struct InstanceNormParams
+{
+  float epsilon;
+  Activation activation;
+};
+
+struct L2NormParams
+{
+  Activation activation;
+};
+
+struct LeakyReluParams
+{
+  float alpha;
+};
+
+struct LocalResponseNormalizationParams
+{
+  int32_t radius;
+  float bias;
+  float alpha;
+  float beta;
+};
+
+struct MirrorPadParams
+{
+  MirrorPadMode mode;
+};
+
+struct MulParams
+{
+  Activation activation;
+};
+
+struct OneHotParams
+{
+  int32_t axis;
+};
+
+struct PackParams
+{
+  int32_t values_count;
+  int32_t axis;
+};
+
+struct Pool2DParams
+{
+  Padding padding;
+  int32_t filter_height;
+  int32_t filter_width;
+  int32_t stride_height;
+  int32_t stride_width;
+  Activation activation;
+};
+
+struct ReducerParams
+{
+  bool keep_dims;
+};
+
+struct ResizeBilinearParams
+{
+  bool align_corners;
+  bool half_pixel_centers;
+};
+
+struct ResizeNearestNeighborParams
+{
+  bool align_corners;
+  bool half_pixel_centers;
+};
+
+struct ShapeParams
+{
+  loco::DataType out_type;
+};
+
+struct SubParams
+{
+  Activation activation;
+};
+
+struct SVDFParams
+{
+  bool asymmetric_quantize_inputs;
+  int32_t svdf_rank;
+  Activation activation;
+};
+
+struct SpaceToDepthParams
+{
+  int block_size;
+};
+
+struct SoftmaxParams
+{
+  float beta;
+};
+
+struct StridedSliceParams
+{
+  int32_t begin_mask;
+  int32_t end_mask;
+  int32_t ellipsis_mask;
+  int32_t new_axis_mask;
+  int32_t shrink_axis_mask;
+};
+
+struct SqueezeParams
+{
+  std::vector<int32_t> squeeze_dims;
+};
+
+struct TransposeConvParams
+{
+  Padding padding;
+  int32_t stride_height;
+  int32_t stride_width;
+};
+
+struct UnpackParams
+{
+  int axis;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_KERNELPARAMS_H
diff --git a/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.cpp b/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.cpp
new file mode 100644
index 000000000..c2f8d2ea8
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.cpp
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "core/RuntimeGraph.h"
+
+#include "core/RuntimeModule.h"
+
+#include <algorithm>
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+
+class RuntimeGraph::TensorAllocPlan
+{
+  std::vector<std::vector<Tensor *>> _alloc_plan;
+  std::vector<std::vector<Tensor *>> _dealloc_plan;
+  bool _valid = false;
+  IMemoryManager *_memory_manager;
+
+public:
+  explicit TensorAllocPlan(IMemoryManager *memory_manager);
+  void invalidate() { _valid = false; }
+  bool isValid() const { return _valid; }
+  void build(const RuntimeGraph &graph);
+  void allocate(size_t kernel_index) const;
+  void deallocate(size_t kernel_index) const;
+};
+
+RuntimeGraph::TensorAllocPlan::TensorAllocPlan(IMemoryManager *memory_manager)
+  : _memory_manager(memory_manager)
+{
+}
+
+void RuntimeGraph::TensorAllocPlan::build(const RuntimeGraph &graph)
+{
+  invalidate();
+  using Lifetime = std::pair<size_t, size_t>;
+  std::unordered_map<Tensor *, Lifetime> lifetimes;
+  const size_t num_kernels = graph._kernels.size();
+  for (size_t index = 0; index < num_kernels; ++index)
+  {
+    const auto &kernel = graph._kernels[index];
+    for (const Tensor *tensor : kernel->getInputTensors())
+    {
+      auto nc_tensor = const_cast<Tensor *>(tensor);
+      if (lifetimes.count(nc_tensor) > 0)
+        lifetimes.at(nc_tensor).second = index;
+    }
+    for (Tensor *tensor : kernel->getOutputTensors())
+    {
+      assert(lifetimes.count(tensor) == 0);
+      lifetimes[tensor] = Lifetime(index, index);
+    }
+  }
+  for (const Tensor *tensor : graph.getOutputTensors())
+  {
+    auto nc_tensor = const_cast<Tensor *>(tensor);
+    if (lifetimes.count(nc_tensor) > 0)
+      lifetimes.at(nc_tensor).second = num_kernels;
+  }
+  _alloc_plan.assign(num_kernels, std::vector<Tensor *>());
+  _dealloc_plan.assign(num_kernels + 1, std::vector<Tensor *>());
+  for (const auto &item : lifetimes)
+  {
+    _alloc_plan[item.second.first].push_back(item.first);
+    _dealloc_plan[item.second.second].push_back(item.first);
+  }
+  _valid = true;
+}
+
+void RuntimeGraph::TensorAllocPlan::allocate(size_t kernel_index) const
+{
+  assert(_valid && kernel_index < _alloc_plan.size());
+  for (Tensor *tensor : _alloc_plan[kernel_index])
+  {
+    _memory_manager->allocate_memory(*tensor);
+  }
+}
+
+void RuntimeGraph::TensorAllocPlan::deallocate(size_t kernel_index) const
+{
+  assert(_valid && kernel_index < _dealloc_plan.size());
+  for (Tensor *tensor : _dealloc_plan[kernel_index])
+  {
+    _memory_manager->release_memory(*tensor);
+  }
+}
+
+RuntimeGraph::RuntimeGraph(RuntimeModule *owning_module, IMemoryManager *memory_manager)
+  : _owning_module(owning_module), _memory_manager(memory_manager),
+    _tensor_alloc_plan(std::make_unique<TensorAllocPlan>(memory_manager))
+{
+}
+
+RuntimeGraph::~RuntimeGraph()
+{
+  for (auto &tensor : _tensors)
+  {
+    if (tensor->is_data_allocated())
+      _memory_manager->release_memory(*tensor);
+  }
+}
+
+Tensor *RuntimeGraph::addTensor(std::unique_ptr<Tensor> &&tensor)
+{
+  assert(tensor != nullptr);
+  _tensors.push_back(std::move(tensor));
+  return _tensors.back().get();
+}
+
+void RuntimeGraph::setInputTensors(const std::vector<Tensor *> &input_tensors)
+{
+  assert(std::all_of(input_tensors.cbegin(), input_tensors.cend(),
+                     [](Tensor *tensor) { return tensor != nullptr; }));
+  _input_tensors = input_tensors;
+}
+
+void RuntimeGraph::setOutputTensors(const std::vector<Tensor *> &output_tensors)
+{
+  assert(std::all_of(output_tensors.cbegin(), output_tensors.cend(),
+                     [](Tensor *tensor) { return tensor != nullptr; }));
+  _output_tensors = output_tensors;
+}
+
+void RuntimeGraph::configureAllocations(Tensor *tensor)
+{
+  _memory_manager->allocate_memory(*tensor);
+}
+
+void RuntimeGraph::addKernel(std::unique_ptr<Kernel> &&kernel)
+{
+  assert(kernel != nullptr);
+  _kernels.push_back(std::move(kernel));
+  _tensor_alloc_plan->invalidate();
+}
+
+void RuntimeGraph::execute() const
+{
+  if (!_tensor_alloc_plan->isValid())
+    _tensor_alloc_plan->build(*this);
+
+  EventNotifier *event_notifier = _owning_module->getEventNotifier();
+
+  // Notify the observers that the input tensors have changed.
+  if (event_notifier != nullptr)
+  {
+    for (const Tensor *input_tensor : getInputTensors())
+    {
+      if (input_tensor->is_observable())
+        event_notifier->postTensorWrite(input_tensor);
+    }
+  }
+
+  for (size_t index = 0; index < _kernels.size(); ++index)
+  {
+    const auto &kernel = _kernels[index];
+    if (event_notifier != nullptr)
+    {
+      event_notifier->preOperatorExecute(kernel.get());
+    }
+
+    // TODO The `configure` method should only be called if the outputs of an operator need to be
+    //  resized.
+    kernel->configure();
+
+    // Preallocate outputs in advance instead of relying on automatic allocation
+    _tensor_alloc_plan->allocate(index);
+
+    kernel->execute();
+
+    if (event_notifier != nullptr)
+    {
+      event_notifier->postOperatorExecute(kernel.get());
+    }
+
+    for (const Tensor *tensor : kernel->getOutputTensors())
+    {
+      if (event_notifier != nullptr && tensor->is_observable())
+      {
+        event_notifier->postTensorWrite(tensor);
+      }
+    }
+    _tensor_alloc_plan->deallocate(index);
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.h b/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.h
new file mode 100644
index 000000000..8184e249d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/core/RuntimeGraph.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H
+#define LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H
+
+#include "luci_interpreter/core/Tensor.h"
+#include "luci_interpreter/MemoryManager.h"
+#include "core/Kernel.h"
+
+#include <memory>
+#include <vector>
+
+namespace luci_interpreter
+{
+
+class RuntimeModule;
+
+class RuntimeGraph
+{
+private:
+  class TensorAllocPlan;
+  friend class TensorAllocPlan;
+
+public:
+  explicit RuntimeGraph(RuntimeModule *owning_module, IMemoryManager *memory_manager);
+  ~RuntimeGraph();
+
+  Tensor *addTensor(std::unique_ptr<Tensor> &&tensor);
+
+  void setInputTensors(const std::vector<Tensor *> &input_tensors);
+  void setOutputTensors(const std::vector<Tensor *> &output_tensors);
+
+  void configureAllocations(Tensor *tensor);
+
+  const std::vector<Tensor *> &getInputTensors() const { return _input_tensors; }
+  const std::vector<Tensor *> &getOutputTensors() const { return _output_tensors; }
+
+  void addKernel(std::unique_ptr<Kernel> &&kernel);
+
+  void execute() const;
+
+private:
+  IMemoryManager *_memory_manager;
+  RuntimeModule *_owning_module;
+  std::vector<std::unique_ptr<Tensor>> _tensors;
+  std::vector<Tensor *> _input_tensors;
+  std::vector<Tensor *> _output_tensors;
+
+  // Kernels in execution order.
+  std::vector<std::unique_ptr<Kernel>> _kernels;
+  // Tensors that are not used anymore after given op
+  std::unique_ptr<TensorAllocPlan> _tensor_alloc_plan;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H
diff --git a/compiler/luci-micro/luci-interpreter/src/core/RuntimeModule.h b/compiler/luci-micro/luci-interpreter/src/core/RuntimeModule.h
new file mode 100644
index 000000000..78873b0ec
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/core/RuntimeModule.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H
+#define LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H
+
+#include "core/RuntimeGraph.h"
+#include "core/EventNotifier.h"
+#include "luci_interpreter/MemoryManager.h"
+
+#include <memory>
+#include <vector>
+
+namespace luci_interpreter
+{
+
+class RuntimeModule
+{
+public:
+  explicit RuntimeModule(EventNotifier *event_notifier) : _event_notifier(event_notifier) {}
+
+  EventNotifier *getEventNotifier() const { return _event_notifier; }
+
+  RuntimeGraph *addGraph(IMemoryManager *memory_manager)
+  {
+    _graphs.push_back(std::make_unique<RuntimeGraph>(this, memory_manager));
+    return _graphs.back().get();
+  }
+
+  const std::vector<Tensor *> &getInputTensors() const { return getMainGraph()->getInputTensors(); }
+  const std::vector<Tensor *> &getOutputTensors() const
+  {
+    return getMainGraph()->getOutputTensors();
+  }
+
+  void execute() const { getMainGraph()->execute(); }
+
+private:
+  RuntimeGraph *getMainGraph() const { return _graphs[0].get(); }
+
+  EventNotifier *const _event_notifier;
+  std::vector<std::unique_ptr<RuntimeGraph>> _graphs;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/core/Tensor.cpp b/compiler/luci-micro/luci-interpreter/src/core/Tensor.cpp
new file mode 100644
index 000000000..3c3c5ffff
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/core/Tensor.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/core/Tensor.h"
+
+#include <cstring>
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+Tensor::Tensor(DataType element_type, Shape shape, AffineQuantization quantization,
+               std::string name)
+  : _element_type(element_type), _shape(std::move(shape)), _quantization(std::move(quantization)),
+    _name(std::move(name)), _data_allocated(false)
+{
+}
+
+void Tensor::readData(void *data_ptr, size_t data_size) const
+{
+  const size_t element_size = getDataTypeSize(element_type());
+  const int32_t num_elements = shape().num_elements();
+  if (data_size != num_elements * element_size)
+  {
+    throw std::invalid_argument("Invalid data size.");
+  }
+  assert(data_ptr != nullptr);
+  std::memcpy(data_ptr, data<void>(), data_size);
+}
+
+void Tensor::writeData(const void *data_ptr, size_t data_size)
+{
+  const size_t element_size = getDataTypeSize(element_type());
+  const int32_t num_elements = shape().num_elements();
+  if (data_size != num_elements * element_size)
+  {
+    throw std::invalid_argument("Invalid data size.");
+  }
+  assert(data_ptr != nullptr);
+  std::memcpy(data<void>(), data_ptr, data_size);
+}
+
+void Tensor::resize(const Shape &new_shape) { _shape = new_shape; }
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/import/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/src/import/CMakeLists.txt
new file mode 100644
index 000000000..dd9733f92
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/import/CMakeLists.txt
@@ -0,0 +1,15 @@
+set(SOURCES
+    "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/GraphBuilderRegistry.h"
+    GraphBuilderRegistry.cpp)
+
+# include specific builders
+file(GLOB_RECURSE NODES "Nodes/*")
+list(APPEND SOURCES ${NODES})
+
+add_library(${LUCI_INTERPRETER_IMPORT} STATIC ${SOURCES})
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(${LUCI_INTERPRETER_IMPORT} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
+
+target_include_directories(${LUCI_INTERPRETER_IMPORT} PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
+target_link_libraries(${LUCI_INTERPRETER_IMPORT} PUBLIC luci_import)
diff --git a/compiler/luci-micro/luci-interpreter/src/import/GraphBuilderRegistry.cpp b/compiler/luci-micro/luci-interpreter/src/import/GraphBuilderRegistry.cpp
new file mode 100644
index 000000000..a33bca6a4
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/import/GraphBuilderRegistry.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "luci_interpreter/GraphBuilderRegistry.h"
+#include "Nodes/CircleReferencingConst.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<luci::GraphBuilderSource> source_without_constant_copying()
+{
+  auto builder = std::make_unique<luci::GraphBuilderRegistry>();
+  {
+    // redefine NodeBuilder of BUFFER type
+    builder->add(std::make_unique<CircleReferencingConstNodeBuilder>());
+  }
+
+  return builder;
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp b/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp
new file mode 100644
index 000000000..14e90f240
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CircleReferencingConst.h"
+
+#include <vector>
+
+namespace
+{
+
+// helper struct which describes data loaded to custom_options of CircleReferencingConst node
+struct ConstDataReference
+{
+  const uint8_t *data = nullptr;
+  uint32_t size = 0;
+};
+
+} // namespace
+
+namespace luci_interpreter
+{
+using namespace luci;
+
+CircleNode *CircleReferencingConstNodeBuilder::build(TensorIndex tensor_index,
+                                                     GraphBuilderContext *context) const
+{
+  assert(tensor_index >= 0);
+
+  const auto graph = context->graph();
+  const auto reader = context->reader();
+  const auto tensors = reader->tensors();
+  auto const const_tensor = tensors[tensor_index];
+  assert(const_tensor != nullptr);
+  if (const_tensor->is_variable())
+  {
+    // Create CircleVariable for variable
+    return nullptr;
+  }
+
+  auto const buffer = wrap(reader->buffers()[const_tensor->buffer()]->data());
+  auto const const_dims = wrap(const_tensor->shape()); // in NHWC
+  if (const_dims.empty() && buffer.empty())
+  {
+    // unknown shape tensor and scalar tensor
+    return nullptr;
+  }
+
+  // if tensor_index is used as output to some other operator, this is not a constant
+  auto tensoroutputs = context->tensoroutputs();
+  if (tensoroutputs->find(tensor_index))
+  {
+    // other operator output tensor
+    return nullptr;
+  }
+
+  uint32_t num_elements = 1;
+  for (uint32_t r = 0; r < const_dims.size(); ++r)
+  {
+    num_elements = num_elements * const_dims[r];
+  }
+
+  if (buffer.empty() && num_elements > 0)
+  {
+    // normal empty tensor
+    return nullptr;
+  }
+
+  // create CircleReferencingConst
+  auto custom_node = graph->nodes()->create<CircleCustom>(0, 1);
+  {
+    custom_node->custom_code("CircleReferencingConst");
+
+    copy_tensor_attributes(const_tensor, custom_node);
+    custom_node->shape_status(luci::ShapeStatus::VALID);
+
+    // custom options stores size of buffer and pointer's value to buffer's data
+    {
+      std::vector<uint8_t> custom_options(sizeof(ConstDataReference));
+      {
+        auto &const_data_ref = *reinterpret_cast<ConstDataReference *>(custom_options.data());
+        const_data_ref = {buffer.data(), buffer.size()};
+      }
+      custom_node->custom_options(custom_options);
+    }
+  }
+
+  // Output of CircleCustom node presented with CircleConstNode
+  auto out_node = graph->nodes()->create<CircleCustomOut>();
+  {
+    out_node->index(0);
+    out_node->input(custom_node);
+
+    copy_tensor_attributes(const_tensor, out_node);
+    out_node->shape_status(luci::ShapeStatus::VALID);
+  }
+
+  return out_node;
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.h b/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.h
new file mode 100644
index 000000000..ed8f95124
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/import/Nodes/CircleReferencingConst.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__
+#define __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__
+
+#include <luci/Import/NodeBuilder.h>
+
+#include <luci/IR/Nodes/CircleConst.h>
+
+namespace luci_interpreter
+{
+using namespace luci;
+
+/**
+ * @brief Builder creates CircleCustom node with pointer to constants data from Tensor with buffer.
+ */
+class CircleReferencingConstNodeBuilder : public TypedNodeBuilder<NodeBuilderType::BUFFER>
+{
+public:
+  CircleNode *build(TensorIndex tensor_index, GraphBuilderContext *ctx) const final;
+};
+
+} // namespace luci_interpreter
+
+#endif // __LUCI_INTERPRETER_IMPORT_OP_CIRCLE_REFERENCING_CONST_H__
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Add.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Add.cpp
new file mode 100644
index 000000000..d7bf3084f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Add.cpp
@@ -0,0 +1,220 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Add.h"
+
+#include "kernels/BinaryOpCommon.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/add.h>
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Add::Add(const Tensor *input1, const Tensor *input2, Tensor *output, const AddParams &params)
+  : KernelWithParams<AddParams>({input1, input2}, {output}, params)
+{
+}
+
+void Add::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
+  if (input1()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 &&
+                           input2()->zero_points().size() == 1);
+    LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 &&
+                           output()->zero_point() == 0);
+  }
+
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Add::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Add::evalFloat() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<float>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastAdd4DSlow(
+      params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+      getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<float>(input1()),
+                               getTensorShape(input2()), getTensorData<float>(input2()),
+                               getTensorShape(output()), getTensorData<float>(output()));
+  }
+}
+
+template <typename T> void Add::evalInteger() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<T>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastAdd4DSlow(
+      params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+      getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<T>(input1()),
+                               getTensorShape(input2()), getTensorData<T>(input2()),
+                               getTensorShape(output()), getTensorData<T>(output()));
+  }
+}
+
+void Add::evalQuantized() const
+{
+  const auto input1_scale = static_cast<double>(input1()->scale());
+  const auto input2_scale = static_cast<double>(input2()->scale());
+  const auto output_scale = static_cast<double>(output()->scale());
+
+  const int left_shift = 20;
+  const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
+  const double real_input1_multiplier = input1_scale / twice_max_input_scale;
+  const double real_input2_multiplier = input2_scale / twice_max_input_scale;
+  const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
+
+  int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
+  int input1_shift{}, input2_shift{}, output_shift{};
+  quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift);
+  quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift);
+  quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::ArithmeticParams params{};
+  params.left_shift = left_shift;
+  // The kernel expects inputs' zero points to be negated.
+  params.input1_offset = -input1()->zero_point(); // Note the '-'.
+  params.input1_multiplier = input1_multiplier;
+  params.input1_shift = input1_shift;
+  params.input2_offset = -input2()->zero_point(); // Note the '-'.
+  params.input2_multiplier = input2_multiplier;
+  params.input2_shift = input2_shift;
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = output_multiplier;
+  params.output_shift = output_shift;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastAdd4DSlow(
+      params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
+      getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
+                               getTensorShape(input2()), getTensorData<uint8_t>(input2()),
+                               getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+}
+
+void Add::evalQuantizedS16() const
+{
+  const auto input1_scale = static_cast<double>(input1()->scale());
+  const auto input2_scale = static_cast<double>(input2()->scale());
+  const auto output_scale = static_cast<double>(output()->scale());
+
+  constexpr int left_shift = 12;
+  const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
+  const double real_input1_multiplier = input1_scale / twice_max_input_scale;
+  const double real_input2_multiplier = input2_scale / twice_max_input_scale;
+  const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
+
+  int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
+  int input1_shift{}, input2_shift{}, output_shift{};
+  quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift);
+  quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift);
+  quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  auto fn = [input1_multiplier, input1_shift, //
+             input2_multiplier, input2_shift, //
+             output_multiplier, output_shift, //
+             activation_min, activation_max](int16_t input1_val, int16_t input2_val) {
+    const int32_t shifted_input1_val = static_cast<int32_t>(input1_val) << left_shift;
+    const int32_t shifted_input2_val = static_cast<int32_t>(input2_val) << left_shift;
+    const int32_t scaled_input1_val = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
+      shifted_input1_val, input1_multiplier, input1_shift);
+    const int32_t scaled_input2_val = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
+      shifted_input2_val, input2_multiplier, input2_shift);
+    const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
+    const int32_t raw_output = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
+      raw_sum, output_multiplier, output_shift);
+    const int32_t clamped_output = std::min(activation_max, std::max(activation_min, raw_output));
+    return static_cast<int16_t>(clamped_output);
+  };
+
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<int16_t>(input1()),
+                        getTensorShape(input2()), getTensorData<int16_t>(input2()),
+                        getTensorShape(output()), getTensorData<int16_t>(output()), fn);
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Add.h b/compiler/luci-micro/luci-interpreter/src/kernels/Add.h
new file mode 100644
index 000000000..91d95b6af
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Add.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_ADD_H
+#define LUCI_INTERPRETER_KERNELS_ADD_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Add : public KernelWithParams<AddParams>
+{
+public:
+  Add(const Tensor *input1, const Tensor *input2, Tensor *output, const AddParams &params);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  template <typename T> void evalInteger() const;
+  void evalQuantized() const;
+  void evalQuantizedS16() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_ADD_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Add.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Add.test.cpp
new file mode 100644
index 000000000..b8b1c3089
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Add.test.cpp
@@ -0,0 +1,357 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Add.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class AddTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+// for quantized Add, the error shouldn't exceed step
+float GetTolerance(float min, float max)
+{
+  float kQuantizedStep = (max - min) / 255.0;
+  return kQuantizedStep;
+}
+
+TEST_F(AddTest, Uint8)
+{
+  std::initializer_list<int32_t> base_shape = {2, 3, 1, 2};
+  std::initializer_list<float> base_data = {-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
+                                            1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
+  std::initializer_list<int32_t> test_shapes[] = {
+    {1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  std::initializer_list<float> test_data = {0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+  std::initializer_list<int32_t> output_shapes[] = {
+    {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
+  std::vector<std::vector<float>> output_data = {
+    {-0.1f, 2.6f,  -0.7f, 2.8f,  0.7f,  3.0f,  1.1f, 0.8f,  0.5f, 1.0f,  1.9f, 1.4f,
+     1.0f,  -0.8f, 0.4f,  -0.6f, 1.8f,  -0.2f, 1.4f, 3.0f,  0.8f, 3.0f,  2.2f, 3.0f,
+     -1.4f, 0.3f,  -2.0f, 0.5f,  -0.6f, 0.9f,  0.9f, -1.9f, 0.3f, -1.7f, 1.7f, -1.3f},
+    {-0.1f, 2.6f, 0.5f, 1.0f, 1.8f, -0.2f, 1.4f, 3.0f, -2.0f, 0.5f, 1.7f, -1.3f},
+    {-0.1f, 2.5f,  0.0f,  2.6f,  -0.7f, 1.9f,  1.1f, 0.7f,  1.2f, 0.8f,  0.5f, 0.1f,
+     1.0f,  -0.9f, 1.1f,  -0.8f, 0.4f,  -1.5f, 1.7f, 3.0f,  2.2f, 3.0f,  2.1f, 3.0f,
+     -1.1f, 0.5f,  -0.6f, 1.0f,  -0.7f, 0.9f,  1.2f, -1.7f, 1.7f, -1.2f, 1.6f, -1.3f},
+    {-0.1f, 2.5f, 1.2f, 0.8f, 0.4f, -1.5f, 1.7f, 3.0f, -0.6f, 1.0f, 1.6f, -1.3f}};
+  float kQuantizedTolerance = GetTolerance(-3.f, 3.f);
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-3.f, 3.f);
+  for (int i = 0; i < output_data.size(); i++)
+  {
+    Tensor input1_tensor = makeInputTensor<DataType::U8>(
+      base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get());
+    Tensor input2_tensor = makeInputTensor<DataType::U8>(
+      test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get());
+    Tensor output_tensor =
+      makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+
+    AddParams params{};
+    params.activation = Activation::NONE;
+
+    Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(dequantizeTensorData(output_tensor),
+                FloatArrayNear(output_data[i], kQuantizedTolerance));
+    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i]));
+  }
+  // Re-run with exchanged inputs.
+  for (int i = 0; i < output_data.size(); i++)
+  {
+    Tensor input1_tensor = makeInputTensor<DataType::U8>(
+      test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get());
+    Tensor input2_tensor = makeInputTensor<DataType::U8>(
+      base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get());
+    Tensor output_tensor =
+      makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+
+    AddParams params{};
+    params.activation = Activation::NONE;
+
+    Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(dequantizeTensorData(output_tensor),
+                FloatArrayNear(output_data[i], kQuantizedTolerance));
+    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i]));
+  }
+}
+
+TEST_F(AddTest, Float)
+{
+  Shape base_shape = {2, 3, 1, 2};
+  std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  std::vector<std::vector<float>> test_outputs = {
+    {0.0f, 2.6f, 0.0f, 2.8f, 0.7f, 3.2f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f,
+     1.0f, 0.0f, 0.4f, 0.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.8f, 3.3f, 2.2f, 3.7f,
+     0.0f, 0.3f, 0.0f, 0.5f, 0.0f, 0.9f, 0.9f, 0.0f, 0.3f, 0.0f, 1.7f, 0.0f},
+    {0.0f, 2.6f, 0.5f, 1.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.0f, 0.5f, 1.7f, 0.0f},
+    {0.0f, 2.5f, 0.0f, 2.6f, 0.0f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f,
+     1.0f, 0.0f, 1.1f, 0.0f, 0.4f, 0.0f, 1.7f, 3.3f, 2.2f, 3.8f, 2.1f, 3.7f,
+     0.0f, 0.5f, 0.0f, 1.0f, 0.0f, 0.9f, 1.2f, 0.0f, 1.7f, 0.0f, 1.6f, 0.0f},
+    {0.0f, 2.5f, 1.2f, 0.8f, 0.4f, 0.0f, 1.7f, 3.3f, 0.0f, 1.0f, 1.6f, 0.0f}};
+  std::vector<float> input1_data{-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
+                                 1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
+  std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor =
+      makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+    Tensor input2_tensor =
+      makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get());
+    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+    AddParams params{};
+    params.activation = Activation::RELU;
+
+    Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
+      << "With shape number " << i;
+  }
+  // Re-run with exchanged inputs.
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor =
+      makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get());
+    Tensor input2_tensor =
+      makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+    AddParams params{};
+    params.activation = Activation::RELU;
+
+    Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
+      << "With shape number " << i;
+  }
+}
+
+template <loco::DataType DType> void CheckInteger(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  Shape base_shape = {2, 3, 1, 2};
+  std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  std::vector<std::vector<dtype>> test_outputs = {
+    {3, 3, 0, 1, 0, 8, 5,  1, 0, 0, 2, 6, 8,  0, 1, 0, 5, 1,
+     5, 4, 0, 2, 2, 9, 11, 0, 4, 0, 8, 5, 11, 2, 4, 0, 8, 7},
+    {3, 3, 0, 0, 5, 1, 5, 4, 4, 0, 8, 7},
+    {3, 6, 0, 3, 0, 0, 5, 4, 2, 1, 0,  0, 8, 0, 5, 0, 1,  0,
+     0, 2, 2, 4, 7, 9, 6, 0, 8, 0, 13, 5, 6, 0, 8, 2, 13, 7},
+    {3, 6, 2, 1, 1, 0, 0, 2, 8, 0, 13, 7}};
+  std::vector<dtype> input1_data{-1, 2, 1, 0, 4, -5, 1, 3, 7, -1, 7, 1};
+  std::vector<dtype> input2_data{4, 1, -3, -1, 1, 6};
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+    Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+    Tensor output_tensor = makeOutputTensor(DType);
+
+    AddParams params{};
+    params.activation = Activation::RELU;
+
+    Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+      << "With shape number " << i;
+  }
+  // Re-run with exchanged inputs.
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+    Tensor input2_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+    Tensor output_tensor = makeOutputTensor(DType);
+
+    AddParams params{};
+    params.activation = Activation::RELU;
+
+    Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+      << "With shape number " << i;
+  }
+};
+
+TEST_F(AddTest, SInt32)
+{
+  CheckInteger<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(AddTest, SInt64)
+{
+  CheckInteger<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(AddTest, SInt16)
+{
+  Shape base_shape = {2, 3, 1, 2};
+  std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  std::vector<std::vector<int32_t>> ref_output_shapes{
+    {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
+
+  std::vector<float> input1_data{-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
+                                 1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
+  std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+  std::vector<std::vector<float>> ref_outputs = {
+    {0.0f, 2.6f, 0.0f, 2.8f, 0.7f, 3.2f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f,
+     1.0f, 0.0f, 0.4f, 0.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.8f, 3.3f, 2.2f, 3.7f,
+     0.0f, 0.3f, 0.0f, 0.5f, 0.0f, 0.9f, 0.9f, 0.0f, 0.3f, 0.0f, 1.7f, 0.0f},
+    {0.0f, 2.6f, 0.5f, 1.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.0f, 0.5f, 1.7f, 0.0f},
+    {0.0f, 2.5f, 0.0f, 2.6f, 0.0f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f,
+     1.0f, 0.0f, 1.1f, 0.0f, 0.4f, 0.0f, 1.7f, 3.3f, 2.2f, 3.8f, 2.1f, 3.7f,
+     0.0f, 0.5f, 0.0f, 1.0f, 0.0f, 0.9f, 1.2f, 0.0f, 1.7f, 0.0f, 1.6f, 0.0f},
+    {0.0f, 2.5f, 1.2f, 0.8f, 0.4f, 0.0f, 1.7f, 3.3f, 0.0f, 1.0f, 1.6f, 0.0f}};
+
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DataType::S16>(base_shape, 3.0 / 32767, 0, input1_data,
+                                                          _memory_manager.get());
+    Tensor input2_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0,
+                                                          input2_data, _memory_manager.get());
+    Tensor output_tensor = makeOutputTensor(DataType::S16, 4.0 / 32767, 0);
+    const float tolerance = output_tensor.scale();
+
+    AddParams params{};
+    params.activation = Activation::RELU;
+
+    Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorShape(output_tensor),
+                ::testing::ElementsAreArray(ref_output_shapes[i]))
+      << "With shape number " << i;
+    EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance))
+      << "With shape number " << i;
+  }
+  // Re-run with exchanged inputs and different scales.
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0,
+                                                          input2_data, _memory_manager.get());
+    Tensor input2_tensor = makeInputTensor<DataType::S16>(base_shape, 4.0 / 32767, 0, input1_data,
+                                                          _memory_manager.get());
+    Tensor output_tensor = makeOutputTensor(DataType::S16, 5.0 / 32767, 0);
+    const float tolerance = output_tensor.scale();
+
+    AddParams params{};
+    params.activation = Activation::RELU;
+
+    Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorShape(output_tensor),
+                ::testing::ElementsAreArray(ref_output_shapes[i]))
+      << "With shape number " << i;
+    EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance))
+      << "With shape number " << i;
+  }
+}
+
+TEST_F(AddTest, Input_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  AddParams params{};
+  params.activation = Activation::RELU;
+
+  Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(AddTest, Invalid_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  AddParams params{};
+  params.activation = Activation::RELU;
+
+  Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(AddTest, Invalid_Input_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U64);
+
+  AddParams params{};
+  params.activation = Activation::RELU;
+
+  Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST_F(AddTest, Invalid_Quantization_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S16>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S16>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16);
+
+  AddParams params{};
+  params.activation = Activation::NONE;
+
+  Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.cpp
new file mode 100644
index 000000000..6561a1783
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ArgMax.h"
+#include "kernels/Utils.h"
+#include "PALArgMax.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ArgMax::ArgMax(const Tensor *input, const Tensor *axis, Tensor *output, const ArgMaxParams &params)
+  : KernelWithParams<ArgMaxParams>({input, axis}, {output}, params)
+{
+}
+
+void ArgMax::configure()
+{
+  assert(axis()->element_type() == DataType::S32 || axis()->element_type() == DataType::S64);
+  assert(input()->shape().num_dims() >= 1);
+  const Shape &input_shape = input()->shape();
+  const int num_dims = input_shape.num_dims();
+  Shape output_shape(num_dims - 1);
+
+  // If axis value is negative, then update by adding input_shape's num_dims.
+  // If updated value also negative, then assert.
+  assert(axis()->shape().num_elements() == 1);
+  int axis_value = getTensorData<int32_t>(axis())[0];
+  if (axis_value < 0)
+    axis_value = axis_value + num_dims;
+  assert(axis_value >= 0);
+
+  int j = 0;
+  for (int i = 0; i < num_dims; i++)
+  {
+    if (i == axis_value)
+      continue;
+    output_shape.dim(j++) = input_shape.dim(i);
+  }
+
+  assert(output()->element_type() == _params.output_type);
+
+  output()->resize(output_shape);
+}
+
+void ArgMax::execute() const
+{
+
+#define TF_LITE_ARG_MAX(data_type, axis_type, output_type)                                    \
+  luci_interpreter_pal::ArgMinMax(getTensorShape(input()), getTensorData<data_type>(input()), \
+                                  getTensorData<axis_type>(axis()), getTensorShape(output()), \
+                                  getTensorData<output_type>(output()), std::greater<data_type>())
+  if (axis()->element_type() == DataType::S32)
+  {
+    switch (_params.output_type)
+    {
+      case DataType::S32:
+        switch (input()->element_type())
+        {
+          case DataType::FLOAT32:
+            TF_LITE_ARG_MAX(float, int32_t, int32_t);
+            break;
+          case DataType::U8:
+            TF_LITE_ARG_MAX(uint8_t, int32_t, int32_t);
+            break;
+          default:
+            throw std::runtime_error("Unsupported input type.");
+        }
+        break;
+      case DataType::S64:
+        switch (input()->element_type())
+        {
+          case DataType::FLOAT32:
+            TF_LITE_ARG_MAX(float, int32_t, int64_t);
+            break;
+          case DataType::U8:
+            TF_LITE_ARG_MAX(uint8_t, int32_t, int64_t);
+            break;
+          default:
+            throw std::runtime_error("Unsupported input type.");
+        }
+        break;
+      default:
+        throw std::runtime_error("Unsupported output type.");
+    }
+  }
+  else
+  {
+    switch (_params.output_type)
+    {
+      case DataType::S32:
+        switch (input()->element_type())
+        {
+          case DataType::FLOAT32:
+            TF_LITE_ARG_MAX(float, int64_t, int32_t);
+            break;
+          case DataType::U8:
+            TF_LITE_ARG_MAX(uint8_t, int64_t, int32_t);
+            break;
+          default:
+            throw std::runtime_error("Unsupported input type.");
+        }
+        break;
+      case DataType::S64:
+        switch (input()->element_type())
+        {
+          case DataType::FLOAT32:
+            TF_LITE_ARG_MAX(float, int64_t, int64_t);
+            break;
+          case DataType::U8:
+            TF_LITE_ARG_MAX(uint8_t, int64_t, int64_t);
+            break;
+          default:
+            throw std::runtime_error("Unsupported input type.");
+        }
+        break;
+      default:
+        throw std::runtime_error("Unsupported output type.");
+    }
+  }
+#undef TF_LITE_ARG_MAX
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.h b/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.h
new file mode 100644
index 000000000..c851b5891
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_ARGMAX_H
+#define LUCI_INTERPRETER_KERNELS_ARGMAX_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ArgMax : public KernelWithParams<ArgMaxParams>
+{
+public:
+  ArgMax(const Tensor *input, const Tensor *axis, Tensor *output, const ArgMaxParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *axis() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_ARGMAX_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.test.cpp
new file mode 100644
index 000000000..474f4b321
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ArgMax.test.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ArgMax.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T1, typename T2>
+void Check(std::initializer_list<int32_t> input_shape,
+           std::initializer_list<int32_t> dimension_shape,
+           std::initializer_list<int32_t> output_shape, std::initializer_list<T1> input_data,
+           std::initializer_list<int32_t> dimension_data, std::initializer_list<T2> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T1>();
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor dimension_tensor =
+    makeInputTensor<DataType::S32>(dimension_shape, dimension_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(getElementType<T2>());
+
+  ArgMaxParams params{};
+  params.output_type = getElementType<T2>();
+  ArgMax kernel(&input_tensor, &dimension_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T2>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <typename T> class ArgMaxTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(ArgMaxTest, DataTypes);
+
+TYPED_TEST(ArgMaxTest, Simple)
+{
+  Check<TypeParam, int32_t>(/*input_shape=*/{1, 1, 1, 4}, /*dimension_shape=*/{},
+                            /*output_shape=*/{1, 1, 1},
+                            /*input_data=*/
+                            {
+                              1, 9, 7, 3, //
+                            },
+                            /*dimension_data=*/{3}, /*output_data=*/{1});
+  Check<TypeParam, int64_t>(/*input_shape=*/{1, 1, 1, 4}, /*dimension_shape=*/{},
+                            /*output_shape=*/{1, 1, 1},
+                            /*input_data=*/
+                            {
+                              1, 9, 7, 3, //
+                            },
+                            /*dimension_data=*/{3}, /*output_data=*/{1});
+}
+
+TYPED_TEST(ArgMaxTest, MultiDimensions)
+{
+  Check<TypeParam, int32_t>(/*input_shape=*/{1, 1, 2, 4}, /*dimension_shape=*/{},
+                            /*output_shape=*/{1, 1, 2},
+                            /*input_data=*/
+                            {
+                              1, 2, 7, 8, //
+                              1, 9, 7, 3, //
+                            },
+                            /*dimension_data=*/{3}, /*output_data=*/{3, 1});
+  Check<TypeParam, int64_t>(/*input_shape=*/{1, 1, 2, 4}, /*dimension_shape=*/{},
+                            /*output_shape=*/{1, 1, 2},
+                            /*input_data=*/
+                            {
+                              1, 2, 7, 8, //
+                              1, 9, 7, 3, //
+                            },
+                            /*dimension_data=*/{3}, /*output_data=*/{3, 1});
+}
+
+TEST(ArgMaxTest, UnsupportedType_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4},
+                                                           {
+                                                             1, 2, 7, 8, //
+                                                             1, 9, 7, 3, //
+                                                           },
+                                                           memory_manager.get());
+  Tensor dimension_tensor = makeInputTensor<DataType::S32>({}, {3}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  ArgMaxParams params{};
+  params.output_type = DataType::U8;
+  ArgMax kernel(&input_tensor, &dimension_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.cpp
new file mode 100644
index 000000000..d3bade9e4
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.cpp
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/AveragePool2D.h"
+
+#include "kernels/Utils.h"
+
+#include "PALAveragePool2d.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+AveragePool2D::AveragePool2D(const Tensor *input, Tensor *output, Tensor *scratchpad,
+                             const Pool2DParams &params)
+  : KernelWithParams<Pool2DParams>({input}, {output, scratchpad}, params)
+{
+}
+
+void AveragePool2D::configure()
+{
+  if (input()->element_type() != output()->element_type())
+  {
+    throw std::runtime_error("Input Tensor and Output Tensor Type must be same");
+  }
+  if (input()->shape().num_dims() != 4)
+  {
+    throw std::runtime_error("Input Tensor Shape must be 4-D");
+  }
+  const Shape &input_shape = input()->shape();
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t depth = input_shape.dim(3);
+
+  const int32_t output_height =
+    computeOutputSize(_params.padding, input_height, _params.filter_height, _params.stride_height);
+  const int32_t output_width =
+    computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width);
+
+  _padding_height =
+    computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height);
+  _padding_width =
+    computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width);
+  if (input()->element_type() == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
+    LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point());
+  }
+  else if (input()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+  }
+  else if (input()->element_type() == DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
+    LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point());
+  }
+  output()->resize({batches, output_height, output_width, depth});
+
+  auto scratchpad = getOutputTensors()[1];
+  luci_interpreter_pal::SetupScratchpadTensor(scratchpad, input()->element_type(),
+                                              getTensorShape(input()), getTensorShape(output()));
+}
+
+void AveragePool2D::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    case DataType::S16:
+      evalSInt16();
+      break;
+    case DataType::S8:
+      evalSInt8();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void AveragePool2D::evalFloat() const
+{
+  float activation_min{};
+  float activation_max{};
+  calculateActivationRange(_params.activation, &activation_min, &activation_max);
+
+  tflite::PoolParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.filter_height = _params.filter_height;
+  params.filter_width = _params.filter_width;
+  params.float_activation_min = activation_min;
+  params.float_activation_max = activation_max;
+
+  tflite::reference_ops::AveragePool(params, getTensorShape(input()), getTensorData<float>(input()),
+                                     getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void AveragePool2D::evalQuantized() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::PoolParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.filter_height = _params.filter_height;
+  params.filter_width = _params.filter_width;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  tflite::reference_ops::AveragePool(params, getTensorShape(input()),
+                                     getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                     getTensorData<uint8_t>(output()));
+}
+
+void AveragePool2D::evalSInt8() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+  tflite::PoolParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.filter_height = _params.filter_height;
+  params.filter_width = _params.filter_width;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  auto scratchpad = getOutputTensors()[1];
+  int8_t *scratchpad_data = nullptr;
+  if (scratchpad->is_allocatable())
+    scratchpad_data = scratchpad->data<int8_t>();
+
+  luci_interpreter_pal::AveragePool<int8_t>(
+    params, getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(output()),
+    getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
+}
+
+void AveragePool2D::evalSInt16() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::PoolParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.filter_height = _params.filter_height;
+  params.filter_width = _params.filter_width;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  tflite::reference_integer_ops::AveragePool(
+    params, getTensorShape(input()), getTensorData<int16_t>(input()), //
+    getTensorShape(output()), getTensorData<int16_t>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.h b/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.h
new file mode 100644
index 000000000..2c8fe16e7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_AVERAGEPOOL2D_H
+#define LUCI_INTERPRETER_KERNELS_AVERAGEPOOL2D_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class AveragePool2D : public KernelWithParams<Pool2DParams>
+{
+public:
+  AveragePool2D(const Tensor *input, Tensor *output, Tensor *scratchpad,
+                const Pool2DParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void evalSInt16() const;
+  void evalSInt8() const;
+
+private:
+  int32_t _padding_height{};
+  int32_t _padding_width{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_AVERAGEPOOL2D_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.test.cpp
new file mode 100644
index 000000000..478bfa68e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/AveragePool2D.test.cpp
@@ -0,0 +1,283 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/AveragePool2D.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class AveragePool2DTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(AveragePool2DTest, Float)
+{
+  Shape input_shape{1, 3, 5, 1};
+  std::vector<float> input_data{
+    -4, -3, -2, -1, 0,  //
+    1,  2,  3,  4,  5,  //
+    6,  7,  8,  9,  10, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 3;
+  params.stride_height = 1;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(scratchpad);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    0, 1.5, //
+    4.5, 6, //
+  };
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 1}));
+}
+
+TEST_F(AveragePool2DTest, Uint8_0)
+{
+  std::vector<float> input_data{
+    0,  -6, 12, 4, //
+    -3, -2, 10, 7, //
+  };
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+  Tensor scratchpad(DataType::U8, Shape({}), {}, "");
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(scratchpad);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0.0, 6.0}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 1}));
+}
+
+TEST_F(AveragePool2DTest, Uint8_1)
+{
+  std::vector<float> input_data{
+    0, 6, 12, 4, //
+    3, 2, 10, 7, //
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+  Tensor scratchpad(DataType::U8, Shape({}), {}, "");
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad);
+  kernel.execute();
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({2.75, 6.0}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 1}));
+}
+
+TEST_F(AveragePool2DTest, SInt16)
+{
+  Shape input_shape{1, 3, 5, 1};
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
+  std::vector<float> input_data{
+    -4, -3, -2, -1, 0,  //
+    1,  2,  3,  4,  5,  //
+    6,  7,  8,  9,  10, //
+  };
+  std::vector<float> ref_output_data{
+    0, 1.5, //
+    4.5, 6, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, 0.5, 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+  Tensor scratchpad(DataType::S16, Shape({}), {}, "");
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 3;
+  params.stride_height = 1;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(scratchpad);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(AveragePool2DTest, SInt8)
+{
+  Shape input_shape{1, 4, 5, 1};
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
+  std::vector<float> input_data{-7, -3, 0,  2, -5, 12, -15, 3,  10, 5,
+                                7,  -6, -1, 9, -2, 0,  -5,  11, -1, -7};
+  std::vector<float> ref_output_data{
+    0, 2.5, //
+    1, 1.5, //
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<int8_t>(-15.9375f, 15.9375f);
+  Tensor input_tensor = makeInputTensor<DataType::S8>(
+    input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, quant_param.first, quant_param.second);
+  Tensor scratchpad(DataType::S8, Shape({}), {}, "");
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 3;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(scratchpad);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(AveragePool2DTest, Invalid_Input_Shape_NEG)
+{
+  Shape input_shape{1, 3, 5};
+  std::vector<float> input_data{
+    -4, -3, -2, -1, 0,  //
+    1,  2,  3,  4,  5,  //
+    6,  7,  8,  9,  10, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 3;
+  params.stride_height = 1;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(AveragePool2DTest, In_Out_Type_NEG)
+{
+  Shape input_shape{1, 3, 5, 1};
+  std::vector<float> input_data{
+    -4, -3, -2, -1, 0,  //
+    1,  2,  3,  4,  5,  //
+    6,  7,  8,  9,  10, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 3;
+  params.stride_height = 1;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(AveragePool2DTest, Quant_Param_NEG)
+{
+  std::vector<float> input_data{
+    0,  -6, 12, 4, //
+    -3, -2, 10, 7, //
+  };
+
+  std::pair<float, int32_t> quant_param1 = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
+  std::pair<float, int32_t> quant_param2 = quantizationParams<uint8_t>(-7.875f, 7.875f);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param1.first, quant_param1.second, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param2.first, quant_param2.second);
+  Tensor scratchpad(DataType::U8, Shape({}), {}, "");
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  AveragePool2D kernel(&input_tensor, &output_tensor, &scratchpad, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.cpp
new file mode 100644
index 000000000..24ca22996
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.cpp
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchMatMul.h"
+#include "kernels/Utils.h"
+
+#include "PALBatchMatMul.h"
+
+#include <tensorflow/lite/kernels/internal/reference/transpose.h>
+
+#include <stdexcept>
+
+namespace
+{
+
+tflite::RuntimeShape SwapRowColumnDims(const tflite::RuntimeShape &shape)
+{
+  tflite::RuntimeShape swapped_shape(shape);
+  const int32_t dims = shape.DimensionsCount();
+  swapped_shape.SetDim(dims - 2, shape.Dims(dims - 1));
+  swapped_shape.SetDim(dims - 1, shape.Dims(dims - 2));
+  return swapped_shape;
+}
+
+} // namespace
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+BatchMatMul::BatchMatMul(const Tensor *x, const Tensor *y, Tensor *output, Tensor *x_tmp,
+                         Tensor *y_tmp, const BatchMatMulParams &params)
+  : KernelWithParams({x, y}, {output, x_tmp, y_tmp}, params)
+{
+}
+
+void BatchMatMul::configure()
+{
+  auto lhs = x();
+  auto rhs = y();
+  auto adj_x = params().adj_x;
+  auto adj_y = params().adj_y;
+
+  // TODO Support non-float types
+  if (lhs->element_type() != DataType::FLOAT32 || rhs->element_type() != DataType::FLOAT32)
+    throw std::runtime_error("Unsupported type.");
+
+  LUCI_INTERPRETER_CHECK(lhs->element_type() == rhs->element_type());
+
+  auto lhs_rank = lhs->shape().num_dims();
+  auto rhs_rank = rhs->shape().num_dims();
+  LUCI_INTERPRETER_CHECK(lhs_rank >= 2 && lhs_rank <= 4);
+  LUCI_INTERPRETER_CHECK(rhs_rank >= 2 && rhs_rank <= 4);
+
+  auto lhs_scratchpad = temp_lhs();
+  auto rhs_scratchpad = temp_rhs();
+  luci_interpreter_pal::SetupScratchpadTensor(lhs_scratchpad, rhs_scratchpad, getTensorShape(lhs),
+                                              getTensorShape(rhs));
+
+  auto output_rank = std::max(lhs_rank, rhs_rank);
+
+  auto extended_lhs_shape = tflite::RuntimeShape::ExtendedShape(output_rank, getTensorShape(lhs));
+  auto extended_rhs_shape = tflite::RuntimeShape::ExtendedShape(output_rank, getTensorShape(rhs));
+
+  // Ensure any batch dimensions obey broacasting rules.
+  for (int i = 0; i < output_rank - 2; ++i)
+  {
+    const int lhs_dim = extended_lhs_shape.Dims(i);
+    const int rhs_dim = extended_rhs_shape.Dims(i);
+    if (lhs_dim != rhs_dim)
+    {
+      if (lhs_dim != 1)
+      {
+        LUCI_INTERPRETER_CHECK(rhs_dim == 1);
+      }
+    }
+  }
+
+  // Ensure other dimensions work for matrix multiplication.
+  int accum_dim_lhs =
+    adj_x ? extended_lhs_shape.Dims(output_rank - 2) : extended_lhs_shape.Dims(output_rank - 1);
+  int accum_dim_rhs =
+    adj_y ? extended_rhs_shape.Dims(output_rank - 1) : extended_rhs_shape.Dims(output_rank - 2);
+  LUCI_INTERPRETER_CHECK(accum_dim_lhs == accum_dim_rhs);
+
+  Shape output_shape(output_rank);
+  // Fill in any broadcast dimensions.
+  for (int i = 0; i < output_rank - 2; ++i)
+  {
+    const int lhs_dim = extended_lhs_shape.Dims(i);
+    const int rhs_dim = extended_rhs_shape.Dims(i);
+    int broadcast_dim = lhs_dim;
+    if ((lhs_dim != rhs_dim) && (lhs_dim == 1))
+    {
+      broadcast_dim = rhs_dim;
+    }
+    output_shape.dim(i) = broadcast_dim;
+  }
+  // Fill in the matmul dimensions.
+  int lhs_rows_index = adj_x ? output_rank - 1 : output_rank - 2;
+  int rhs_cols_index = adj_y ? output_rank - 2 : output_rank - 1;
+
+  output_shape.dim(output_rank - 2) = extended_lhs_shape.Dims(lhs_rows_index);
+  output_shape.dim(output_rank - 1) = extended_rhs_shape.Dims(rhs_cols_index);
+
+  output()->resize(output_shape);
+}
+
+void TransposeRowsColumns(const Tensor *tensor_in, Tensor *tensor_out)
+{
+  tflite::RuntimeShape transposed_shape(getTensorShape(tensor_in));
+  tflite::RuntimeShape shape(getTensorShape(tensor_in));
+  tflite::TransposeParams params;
+  int rank = shape.DimensionsCount();
+  params.perm_count = rank;
+  for (int i = 0; i < rank - 2; ++i)
+  {
+    params.perm[i] = i;
+  }
+  // Transpose the last two dimensions.
+  params.perm[rank - 2] = rank - 1;
+  params.perm[rank - 1] = rank - 2;
+  transposed_shape.SetDim(rank - 1, shape.Dims(rank - 2));
+  transposed_shape.SetDim(rank - 2, shape.Dims(rank - 1));
+  switch (tensor_in->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::reference_ops::Transpose(params, shape, getTensorData<float>(tensor_in),
+                                       transposed_shape, getTensorData<float>(tensor_out));
+      break;
+    default:
+      throw std::runtime_error("Only suppport fp32 BatchMatMul for now.");
+  }
+}
+
+void BatchMatMul::execute() const
+{
+  auto lhs = x();
+  auto rhs = y();
+
+  bool adj_x = params().adj_x;
+  bool adj_y = params().adj_y;
+
+  auto orig_lhs_shape = getTensorShape(lhs);
+  auto orig_rhs_shape = getTensorShape(rhs);
+
+  auto rhs_tensor = adj_y ? rhs : temp_rhs();
+  auto lhs_tensor = adj_x ? temp_lhs() : lhs;
+  if (not adj_y)
+  {
+    TransposeRowsColumns(rhs, temp_rhs());
+  }
+  if (adj_x)
+  {
+    TransposeRowsColumns(lhs, temp_lhs());
+  }
+  tflite::RuntimeShape rhs_shape = adj_y ? orig_rhs_shape : SwapRowColumnDims(orig_rhs_shape);
+  tflite::RuntimeShape lhs_shape = adj_x ? orig_lhs_shape : SwapRowColumnDims(orig_lhs_shape);
+
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      luci_interpreter_pal::BatchMatMul(rhs_shape, getTensorData<float>(rhs_tensor), lhs_shape,
+                                        getTensorData<float>(lhs_tensor), getTensorShape(output()),
+                                        getTensorData<float>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.h b/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.h
new file mode 100644
index 000000000..744f49795
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H
+#define LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class BatchMatMul : public KernelWithParams<BatchMatMulParams>
+{
+public:
+  BatchMatMul(const Tensor *x, const Tensor *y, Tensor *output, Tensor *x_tmp, Tensor *y_tmp,
+              const BatchMatMulParams &params);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  Tensor *temp_lhs() const { return _outputs[1]; }
+  Tensor *temp_rhs() const { return _outputs[2]; }
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_BATCHMATMUL_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.test.cpp
new file mode 100644
index 000000000..edfa3a685
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/BatchMatMul.test.cpp
@@ -0,0 +1,272 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchMatMul.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class BatchMatMulTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(BatchMatMulTest, Float)
+{
+  std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6};
+  std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 3}, lhs_data, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 3, 4}, rhs_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(lhs_scratch);
+  _memory_manager->allocate_memory(rhs_scratch);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4}));
+}
+
+TEST_F(BatchMatMulTest, Float_SimpleRHSAdjoint)
+{
+  std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6};
+  std::vector<float> rhs_data = {7, 11, 15, 8, 12, 16, 9, 13, 17, 10, 14, 18};
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 3}, lhs_data, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 4, 3}, rhs_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = true;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(lhs_scratch);
+  _memory_manager->allocate_memory(rhs_scratch);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4}));
+}
+
+TEST_F(BatchMatMulTest, Float_SimpleLHSAdjoint)
+{
+  std::vector<float> lhs_data = {1, 4, 2, 5, 3, 6};
+  std::vector<float> rhs_data = {7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 3, 2}, lhs_data, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 3, 4}, rhs_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = true;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(lhs_scratch);
+  _memory_manager->allocate_memory(rhs_scratch);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218.}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4}));
+}
+
+TEST_F(BatchMatMulTest, Float_BatchSizeTwo)
+{
+  std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  std::vector<float> rhs_data = {7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18,
+                                 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30};
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 2, 3}, lhs_data, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3, 4}, rhs_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(lhs_scratch);
+  _memory_manager->allocate_memory(rhs_scratch);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({74., 80., 86., 92., 173., 188., 203., 218., 560., 584., 608., 632.,
+                              767., 800., 833., 866.}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 2, 4}));
+}
+
+TEST_F(BatchMatMulTest, Float_DiffBatch)
+{
+  std::vector<float> lhs_data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  std::vector<float> rhs_data = {7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18,
+                                 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30};
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 1, 6}, lhs_data, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 6, 4}, rhs_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(lhs_scratch);
+  _memory_manager->allocate_memory(rhs_scratch);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({427., 448., 469., 490., 1039., 1096., 1153., 1210.}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 1, 4}));
+}
+
+TEST_F(BatchMatMulTest, Invalid_Shape_NEG)
+{
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 2}, {1, 2, 3, 4}, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 3, 2}, {5, 6, 7, 8, 9, 10}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(BatchMatMulTest, Invalid_Batch_NEG)
+{
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 1, 3}, {1, 2, 3, 4, 5, 6}, _memory_manager.get());
+  Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({3, 3, 1}, {5, 6, 7, 8, 9, 10, 11, 12, 13},
+                                                         _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(BatchMatMulTest, Invalid_Rank_NEG)
+{
+  Tensor lhs_tensor = makeInputTensor<DataType::FLOAT32>({4}, {1, 2, 3, 4}, _memory_manager.get());
+  Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({1, 4, 2}, {5, 6, 7, 8, 9, 10, 11, 12},
+                                                         _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(BatchMatMulTest, Invalid_Rank2_NEG)
+{
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 1, 1, 1, 4}, {1, 2, 3, 4}, _memory_manager.get());
+  Tensor rhs_tensor = makeInputTensor<DataType::FLOAT32>({1, 4, 2}, {5, 6, 7, 8, 9, 10, 11, 12},
+                                                         _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(BatchMatMulTest, TypeMisMatch_NEG)
+{
+  Tensor lhs_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 3}, {1, 2, 3, 4, 5, 6}, _memory_manager.get());
+  Tensor rhs_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 3, 2}, {5, 6, 7, 8, 9, 10}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor lhs_scratch(DataType::U8, Shape({}), {}, "");
+  Tensor rhs_scratch(DataType::FLOAT32, Shape({}), {}, "");
+
+  BatchMatMulParams params;
+  params.adj_x = false;
+  params.adj_y = false;
+
+  BatchMatMul kernel(&lhs_tensor, &rhs_tensor, &output_tensor, &lhs_scratch, &rhs_scratch, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.cpp
new file mode 100644
index 000000000..bd315ff7b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchToSpaceND.h"
+#include "kernels/Utils.h"
+
+#include "PALBatchToSpaceND.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+namespace
+{
+const int kInputMinDimensionNum = 3;
+const int kInputMaxDimensionNum = 4;
+} // namespace
+
+BatchToSpaceND::BatchToSpaceND(const Tensor *input, const Tensor *block_shape, const Tensor *crops,
+                               Tensor *output)
+  : Kernel({input, block_shape, crops}, {output})
+{
+}
+
+void BatchToSpaceND::configure()
+{
+
+  const auto *block_shape_data = block_shape()->data<int32_t>();
+  const auto *crops_data = crops()->data<int32_t>();
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= kInputMinDimensionNum);
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= kInputMaxDimensionNum);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+  int spatial_dims_num = input()->shape().num_dims() - 2;
+
+  LUCI_INTERPRETER_CHECK(block_shape()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(block_shape()->shape().dim(0) == spatial_dims_num);
+
+  LUCI_INTERPRETER_CHECK(crops()->shape().num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(crops()->shape().dim(0) == spatial_dims_num);
+  LUCI_INTERPRETER_CHECK(crops()->shape().dim(1) == 2);
+  for (int i = 0; i < spatial_dims_num * 2; ++i)
+  {
+    LUCI_INTERPRETER_CHECK(crops_data[i] >= 0);
+  }
+
+  Shape output_shape = Shape(input()->shape().num_dims());
+  int output_batch_size = input()->shape().dim(0);
+  for (int i = 0; i < spatial_dims_num; ++i)
+  {
+    LUCI_INTERPRETER_CHECK(output_batch_size % block_shape_data[i] == 0);
+    output_batch_size = output_batch_size / block_shape_data[i];
+    output_shape.dim(i + 1) =
+      input()->shape().dim(i + 1) * block_shape_data[i] - crops_data[i * 2] - crops_data[i * 2 + 1];
+  }
+
+  output_shape.dim(0) = output_batch_size;
+  output_shape.dim(input()->shape().num_dims() - 1) =
+    input()->shape().dim(input()->shape().num_dims() - 1);
+  output()->resize(output_shape);
+}
+
+void BatchToSpaceND::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      luci_interpreter_pal::BatchToSpaceND(
+        getTensorShape(input()), getTensorData<float>(input()), getTensorShape(block_shape()),
+        getTensorData<int32_t>(block_shape()), getTensorShape(crops()),
+        getTensorData<int32_t>(crops()), getTensorShape(output()), getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      luci_interpreter_pal::BatchToSpaceND(
+        getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(block_shape()),
+        getTensorData<int32_t>(block_shape()), getTensorShape(crops()),
+        getTensorData<int32_t>(crops()), getTensorShape(output()),
+        getTensorData<uint8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.h b/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.h
new file mode 100644
index 000000000..57703ea5d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_BATCHTOSPACEND_H
+#define LUCI_INTERPRETER_KERNELS_BATCHTOSPACEND_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class BatchToSpaceND : public Kernel
+{
+public:
+  BatchToSpaceND(const Tensor *input, const Tensor *block_shape, const Tensor *crops,
+                 Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *block_shape() const { return _inputs[1]; }
+  const Tensor *crops() const { return _inputs[2]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_BATCHTOSPACEND_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp
new file mode 100644
index 000000000..52647a763
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/BatchToSpaceND.test.cpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/BatchToSpaceND.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape,
+           std::initializer_list<int32_t> block_shape_shape,
+           std::initializer_list<int32_t> crops_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<T> input_data, std::initializer_list<int32_t> block_shape_data,
+           std::initializer_list<int32_t> crops_data, std::initializer_list<T> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor block_shape_tensor =
+    makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data, memory_manager.get());
+  Tensor crops_tensor =
+    makeInputTensor<DataType::S32>(crops_shape, crops_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <typename T> class BatchToSpaceNDTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(BatchToSpaceNDTest, DataTypes);
+
+TYPED_TEST(BatchToSpaceNDTest, Simple)
+{
+  Check<TypeParam>(/*input_shape=*/{4, 2, 2, 1}, /*block_shape_shape=*/{2}, /*crops_shape=*/{2, 2},
+                   /*output_shape=*/{1, 4, 4, 1},
+                   /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+                   /*block_shape_data=*/{2, 2}, /*crops_data=*/{0, 0, 0, 0},
+                   /*output_data=*/{1, 5, 2, 6, 9, 13, 10, 14, 3, 7, 4, 8, 11, 15, 12, 16});
+}
+
+TEST(BatchToSpaceNDTest, Invalid_Shape_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {3, 2, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, memory_manager.get());
+  Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}, memory_manager.get());
+  Tensor crops_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, 0, 0}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(BatchToSpaceNDTest, Invalid_Crops_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {4, 2, 2, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, memory_manager.get());
+  Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}, memory_manager.get());
+  Tensor crops_tensor = makeInputTensor<DataType::S32>({2, 2}, {0, 0, -1, 0}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  BatchToSpaceND kernel(&input_tensor, &block_shape_tensor, &crops_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/BinaryOpCommon.h b/compiler/luci-micro/luci-interpreter/src/kernels/BinaryOpCommon.h
new file mode 100644
index 000000000..2d2842a9e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/BinaryOpCommon.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_BINARYOPUTILS_H
+#define LUCI_INTERPRETER_KERNELS_BINARYOPUTILS_H
+
+#include "tensorflow/lite/kernels/internal/common.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+// Derived from tensorflow/lite/kernels/internal/reference/maximum_minimum.h (v2.3.0).
+template <typename T, typename Op, int N = 5>
+void BinaryOpBroadcastSlow(const tflite::RuntimeShape &unextended_input1_shape,
+                           const T *input1_data,
+                           const tflite::RuntimeShape &unextended_input2_shape,
+                           const T *input2_data,
+                           const tflite::RuntimeShape &unextended_output_shape, T *output_data,
+                           Op op)
+{
+  if (unextended_input1_shape == unextended_input2_shape)
+  {
+    const int flat_size = tflite::MatchingElementsSize(
+      unextended_input1_shape, unextended_input2_shape, unextended_output_shape);
+    for (int i = 0; i < flat_size; ++i)
+    {
+      output_data[i] = op(input1_data[i], input2_data[i]);
+    }
+  }
+  else
+  {
+    assert(unextended_input1_shape.DimensionsCount() <= N);
+    assert(unextended_input2_shape.DimensionsCount() <= N);
+    assert(unextended_output_shape.DimensionsCount() <= N);
+
+    tflite::NdArrayDesc<N> desc1{};
+    tflite::NdArrayDesc<N> desc2{};
+    tflite::NdArrayDesc<N> output_desc{};
+    tflite::NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape,
+                                                &desc1, &desc2);
+    tflite::CopyDimsToDesc(tflite::RuntimeShape::ExtendedShape(N, unextended_output_shape),
+                           &output_desc);
+
+    auto fn = [&](int indexes[N]) {
+      output_data[SubscriptToIndex(output_desc, indexes)] =
+        op(input1_data[SubscriptToIndex(desc1, indexes)],
+           input2_data[SubscriptToIndex(desc2, indexes)]);
+    };
+    tflite::NDOpsHelper<N>(output_desc, fn);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_BINARYOPUTILS_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/src/kernels/CMakeLists.txt
new file mode 100644
index 000000000..9f4ba0e0b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/CMakeLists.txt
@@ -0,0 +1,43 @@
+set(SOURCES
+        BinaryOpCommon.h
+        Utils.h
+        Utils.cpp
+        "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/TestMemoryManager.h"
+        ${LUCI_INTERPRETER_SOURCE_DIR}/TestMemoryManager.cpp
+        "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/SimpleMemoryManager.h"
+        ${LUCI_INTERPRETER_SOURCE_DIR}/SimpleMemoryManager.cpp)
+
+macro(REGISTER_KERNEL NODE)
+  list(APPEND SOURCES "${NODE}.h")
+  list(APPEND SOURCES "${NODE}.cpp")
+endmacro(REGISTER_KERNEL)
+
+include(${KERNEL_REGISTER_FILE})
+
+add_library(${LUCI_INTERPRETER_KERNELS} STATIC ${SOURCES})
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(${LUCI_INTERPRETER_KERNELS} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
+target_include_directories(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_SOURCE_DIR})
+
+target_link_libraries(${LUCI_INTERPRETER_KERNELS} PUBLIC ${LUCI_INTERPRETER_CORE})
+target_link_libraries(${LUCI_INTERPRETER_KERNELS} PRIVATE nncc_common)
+
+add_pal_to_target(${LUCI_INTERPRETER_KERNELS})
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+macro(REGISTER_KERNEL NODE)
+  list(APPEND TEST_SOURCES "${NODE}.test.cpp")
+endmacro(REGISTER_KERNEL)
+
+include(${KERNEL_REGISTER_FILE})
+
+list(APPEND TEST_SOURCES TestUtils.h TestUtils.cpp)
+
+GTest_AddTest(${LUCI_INTERPRETER_KERNELS}_test ${TEST_SOURCES})
+target_link_libraries(${LUCI_INTERPRETER_KERNELS}_test ${LUCI_INTERPRETER_KERNELS})
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Cast.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Cast.cpp
new file mode 100644
index 000000000..39ee725dc
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Cast.cpp
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Cast.h"
+#include "kernels/Utils.h"
+
+namespace
+{
+
+using namespace luci_interpreter;
+using namespace luci_interpreter::kernels;
+
+template <typename InT, typename OutT>
+void cast_data(const InT *in_data, OutT *out_data, uint32_t elements_count)
+{
+  std::transform(in_data, in_data + elements_count, out_data,
+                 [](InT a) { return static_cast<OutT>(a); });
+}
+
+template <typename InT> void cast_from_pointer_to_tensor(const InT *in_data, Tensor *out_tensor)
+{
+  auto const out_type = out_tensor->element_type();
+  auto const elements_count = out_tensor->shape().num_elements();
+
+  switch (out_type)
+  {
+    case loco::DataType::U8:
+      cast_data(in_data, getTensorData<uint8_t>(out_tensor), elements_count);
+      break;
+    case loco::DataType::U16:
+      cast_data(in_data, getTensorData<uint16_t>(out_tensor), elements_count);
+      break;
+    case loco::DataType::U32:
+      cast_data(in_data, getTensorData<uint32_t>(out_tensor), elements_count);
+      break;
+    case loco::DataType::U64:
+      cast_data(in_data, getTensorData<uint64_t>(out_tensor), elements_count);
+      break;
+    case loco::DataType::S8:
+      cast_data(in_data, getTensorData<int8_t>(out_tensor), elements_count);
+      break;
+    case loco::DataType::S16:
+      cast_data(in_data, getTensorData<int16_t>(out_tensor), elements_count);
+      break;
+    case loco::DataType::S32:
+      cast_data(in_data, getTensorData<int32_t>(out_tensor), elements_count);
+      break;
+    case loco::DataType::S64:
+      cast_data(in_data, getTensorData<int64_t>(out_tensor), elements_count);
+      break;
+    case loco::DataType::FLOAT32:
+      cast_data(in_data, getTensorData<float>(out_tensor), elements_count);
+      break;
+    case loco::DataType::BOOL:
+      cast_data(in_data, getTensorData<bool>(out_tensor), elements_count);
+      break;
+    default:
+      throw std::runtime_error("Unsupported output type.");
+  }
+}
+
+void cast_from_tensor_to_tensor(const Tensor *in_tensor, Tensor *out_tensor)
+{
+  auto in_type = in_tensor->element_type();
+
+  switch (in_type)
+  {
+    case loco::DataType::U8:
+      cast_from_pointer_to_tensor(getTensorData<uint8_t>(in_tensor), out_tensor);
+      break;
+    case loco::DataType::U16:
+      cast_from_pointer_to_tensor(getTensorData<uint16_t>(in_tensor), out_tensor);
+      break;
+    case loco::DataType::U32:
+      cast_from_pointer_to_tensor(getTensorData<uint32_t>(in_tensor), out_tensor);
+      break;
+    case loco::DataType::U64:
+      cast_from_pointer_to_tensor(getTensorData<uint64_t>(in_tensor), out_tensor);
+      break;
+    case loco::DataType::S8:
+      cast_from_pointer_to_tensor(getTensorData<int8_t>(in_tensor), out_tensor);
+      break;
+    case loco::DataType::S16:
+      cast_from_pointer_to_tensor(getTensorData<int16_t>(in_tensor), out_tensor);
+      break;
+    case loco::DataType::S32:
+      cast_from_pointer_to_tensor(getTensorData<int32_t>(in_tensor), out_tensor);
+      break;
+    case loco::DataType::S64:
+      cast_from_pointer_to_tensor(getTensorData<int64_t>(in_tensor), out_tensor);
+      break;
+    case loco::DataType::FLOAT32:
+      cast_from_pointer_to_tensor(getTensorData<float>(in_tensor), out_tensor);
+      break;
+    case loco::DataType::BOOL:
+      cast_from_pointer_to_tensor(getTensorData<bool>(in_tensor), out_tensor);
+      break;
+    default:
+      throw std::runtime_error("Unsupported input type.");
+  }
+}
+
+} // namespace
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Cast::Cast(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Cast::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() != loco::DataType::Unknown);
+  LUCI_INTERPRETER_CHECK(output()->element_type() != loco::DataType::Unknown);
+
+  const Shape &shape = input()->shape();
+  output()->resize(shape);
+}
+
+void Cast::execute() const
+{
+  assert(input()->shape().num_elements() == output()->shape().num_elements());
+
+  cast_from_tensor_to_tensor(input(), output());
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Cast.h b/compiler/luci-micro/luci-interpreter/src/kernels/Cast.h
new file mode 100644
index 000000000..f0bd02037
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Cast.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_CAST_H
+#define LUCI_INTERPRETER_KERNELS_CAST_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Cast : public Kernel
+{
+public:
+  Cast(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_CAST_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Cast.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Cast.test.cpp
new file mode 100644
index 000000000..4713ad34c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Cast.test.cpp
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Cast.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T1, typename T2>
+void Check(std::initializer_list<int32_t> shape, std::initializer_list<T1> input_data,
+           std::initializer_list<T2> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType input_type = getElementType<T1>();
+  constexpr DataType output_type = getElementType<T2>();
+
+  Tensor input_tensor = makeInputTensor<input_type>(shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(output_type);
+
+  Cast kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T2>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), shape);
+}
+
+template <typename T>
+void CheckBoolTo(std::initializer_list<int32_t> shape, std::initializer_list<bool> input_data,
+                 std::initializer_list<T> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType input_type = loco::DataType::BOOL;
+  constexpr DataType output_type = getElementType<T>();
+  std::vector<typename DataTypeImpl<input_type>::Type> input_data_converted;
+  for (auto elem : input_data)
+  {
+    input_data_converted.push_back(elem);
+  }
+
+  Tensor input_tensor =
+    makeInputTensor<input_type>(shape, input_data_converted, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(output_type);
+
+  Cast kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), shape);
+}
+
+template <typename T> class CastTest : public ::testing::Test
+{
+};
+
+using IntDataTypes =
+  ::testing::Types<uint8_t, uint16_t, uint32_t, uint64_t, int8_t, int16_t, int32_t, int64_t>;
+TYPED_TEST_SUITE(CastTest, IntDataTypes);
+
+TYPED_TEST(CastTest, FloatToInt)
+{
+  Check<float, TypeParam>(/*shape=*/{1, 1, 1, 4},
+                          /*input_data=*/
+                          {
+                            1.0f, 9.0f, 7.0f, 3.0f, //
+                          },
+                          /*output_data=*/
+                          {
+                            1, 9, 7, 3, //
+                          });
+  SUCCEED();
+}
+
+TYPED_TEST(CastTest, IntToFloat)
+{
+  Check<TypeParam, float>(/*shape=*/{1, 1, 1, 4},
+                          /*input_data=*/
+                          {
+                            1, 9, 7, 3, //
+                          },
+                          /*output_data=*/
+                          {
+                            1.0f, 9.0f, 7.0f, 3.0f, //
+                          });
+  SUCCEED();
+}
+
+template <typename T1, typename T2> void check_int()
+{
+  Check<T1, T2>(/*shape=*/{1, 1, 1, 4},
+                /*input_data=*/
+                {
+                  1, 9, 7, 3, //
+                },
+                /*output_data=*/
+                {
+                  1, 9, 7, 3, //
+                });
+  SUCCEED();
+}
+
+TYPED_TEST(CastTest, IntToInt)
+{
+  check_int<TypeParam, uint8_t>();
+  check_int<TypeParam, uint16_t>();
+  check_int<TypeParam, uint32_t>();
+  check_int<TypeParam, uint64_t>();
+  check_int<TypeParam, int8_t>();
+  check_int<TypeParam, int16_t>();
+  check_int<TypeParam, int32_t>();
+  check_int<TypeParam, int64_t>();
+  SUCCEED();
+}
+
+TYPED_TEST(CastTest, IntToBool)
+{
+  Check<TypeParam, bool>(/*shape=*/{1, 1, 1, 4},
+                         /*input_data=*/
+                         {
+                           1, 0, 7, 0, //
+                         },
+                         /*output_data=*/
+                         {
+                           true, false, true, false, //
+                         });
+  SUCCEED();
+}
+
+TYPED_TEST(CastTest, BoolToInt)
+{
+  CheckBoolTo<TypeParam>(/*shape=*/{1, 1, 1, 4},
+                         /*input_data=*/
+                         {
+                           true, false, false, true, //
+                         },
+                         /*output_data=*/
+                         {
+                           1, 0, 0, 1, //
+                         });
+  SUCCEED();
+}
+
+TEST(CastTest, FloatToBool)
+{
+  Check<float, bool>(/*shape=*/{1, 1, 1, 4},
+                     /*input_data=*/
+                     {
+                       1.0f, 0.0f, 7.0f, 0.0f, //
+                     },
+                     /*output_data=*/
+                     {
+                       true, false, true, false, //
+                     });
+  SUCCEED();
+}
+
+TEST(CastTest, BoolToFloat)
+{
+  CheckBoolTo<float>(/*shape=*/{1, 1, 1, 4},
+                     /*input_data=*/
+                     {
+                       true, false, false, true, //
+                     },
+                     /*output_data=*/
+                     {
+                       1.0f, 0.0f, 0.0f, 1.0f, //
+                     });
+  SUCCEED();
+}
+
+TEST(CastTest, FloatToFloat)
+{
+  Check<float, float>(/*shape=*/{1, 1, 1, 4},
+                      /*input_data=*/
+                      {
+                        1.0f, 0.0f, 7.0f, 0.0f, //
+                      },
+                      /*output_data=*/
+                      {
+                        1.0f, 0.0f, 7.0f, 0.0f, //
+                      });
+  SUCCEED();
+}
+
+TEST(CastTest, BoolToBool)
+{
+  CheckBoolTo<bool>(/*shape=*/{1, 1, 1, 4},
+                    /*input_data=*/
+                    {
+                      true, true, false, false, //
+                    },
+                    /*output_data=*/
+                    {
+                      true, true, false, false, //
+                    });
+  SUCCEED();
+}
+
+TEST(CastTest, UnsupportedType_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 2, 4},
+                                                           {
+                                                             1, 2, 7, 8, //
+                                                             1, 9, 7, 3, //
+                                                           },
+                                                           memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::Unknown);
+
+  Cast kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+  SUCCEED();
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.cpp
new file mode 100644
index 000000000..46ee5941e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.cpp
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Concatenation.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/concatenation.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Concatenation::Concatenation(std::vector<const Tensor *> inputs, Tensor *output,
+                             const ConcatenationParams &params)
+  : KernelWithParams<ConcatenationParams>(std::move(inputs), {output}, params)
+{
+}
+
+void Concatenation::configure()
+{
+  const int num_inputs = _inputs.size();
+  LUCI_INTERPRETER_CHECK(num_inputs > 0);
+  const Tensor *t0 = _inputs[0];
+
+  // TODO: Support concat with fused activation function
+  LUCI_INTERPRETER_CHECK(params().activation == luci::FusedActFunc::NONE);
+
+  int axis = _params.axis;
+  if (axis < 0)
+    axis += t0->shape().num_dims();
+  LUCI_INTERPRETER_CHECK(axis >= 0 && axis < t0->shape().num_dims());
+
+  int32_t sum_axis = t0->shape().dim(axis);
+  for (int i = 1; i < num_inputs; ++i)
+  {
+    const Tensor *tensor = _inputs[i];
+    LUCI_INTERPRETER_CHECK(tensor->element_type() == t0->element_type());
+    LUCI_INTERPRETER_CHECK(tensor->shape().num_dims() == t0->shape().num_dims());
+    for (int d = 0; d < t0->shape().num_dims(); ++d)
+    {
+      if (d == axis)
+      {
+        sum_axis += tensor->shape().dim(axis);
+      }
+      else
+      {
+        LUCI_INTERPRETER_CHECK(tensor->shape().dim(d) == t0->shape().dim(d));
+      }
+    }
+  }
+
+  Shape output_shape = t0->shape();
+  output_shape.dim(axis) = sum_axis;
+
+  // If input tensors are INT8 type then quantization parameters of all input tensors and the output
+  // should be the same
+  for (auto current_tensor : _inputs)
+  {
+    if (current_tensor->element_type() == DataType::S8)
+    {
+      LUCI_INTERPRETER_CHECK(current_tensor->quantized_dimension() ==
+                             output()->quantized_dimension());
+
+      LUCI_INTERPRETER_CHECK(current_tensor->zero_points().size() ==
+                             current_tensor->scales().size());
+      LUCI_INTERPRETER_CHECK(current_tensor->zero_points() == output()->zero_points());
+      LUCI_INTERPRETER_CHECK(current_tensor->scales() == output()->scales());
+    }
+  }
+  output()->resize(output_shape);
+}
+
+void Concatenation::execute() const
+{
+  switch (_inputs[0]->element_type())
+  {
+    case DataType::FLOAT32:
+      evalGeneric<float>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    case DataType::S8:
+      evalGeneric<int8_t>();
+      break;
+    case DataType::S32:
+      evalGeneric<int32_t>();
+      break;
+    case DataType::S64:
+      evalGeneric<int64_t>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T> void Concatenation::evalGeneric() const
+{
+  int axis = _params.axis;
+  if (axis < 0)
+    axis += output()->shape().num_dims();
+
+  VectorOfTensors<T, true> inputs(_inputs);
+  tflite::ConcatenationParams params{};
+  params.axis = axis;
+  params.inputs_count = _inputs.size();
+  tflite::reference_ops::Concatenation(params, inputs.shapes(), inputs.data(),
+                                       getTensorShape(output()), getTensorData<T>(output()));
+}
+
+void Concatenation::evalQuantized() const
+{
+  int axis = _params.axis;
+  if (axis < 0)
+    axis += output()->shape().num_dims();
+
+  VectorOfQuantizedTensors<true> inputs(_inputs);
+  tflite::ConcatenationParams params{};
+  params.axis = axis;
+  params.input_zeropoint = inputs.zero_point();
+  params.input_scale = inputs.scale();
+  params.inputs_count = _inputs.size();
+  params.output_zeropoint = output()->zero_point();
+  params.output_scale = output()->scale();
+
+  tflite::reference_ops::ConcatenationWithScaling(params, inputs.shapes(), inputs.data(),
+                                                  getTensorShape(output()),
+                                                  getTensorData<uint8_t>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.h b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.h
new file mode 100644
index 000000000..b48c8ed1e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_CONCATENATION_H
+#define LUCI_INTERPRETER_KERNELS_CONCATENATION_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Concatenation : public KernelWithParams<ConcatenationParams>
+{
+public:
+  Concatenation(std::vector<const Tensor *> inputs, Tensor *output,
+                const ConcatenationParams &params);
+
+  const Tensor *input(int index) const { return _inputs[index]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> void evalGeneric() const;
+  void evalQuantized() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_CONCATENATION_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.test.cpp
new file mode 100644
index 000000000..f893b38fd
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Concatenation.test.cpp
@@ -0,0 +1,268 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Concatenation.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ConcatenationTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(ConcatenationTest, Float)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  // Try different 'axis' and expect different results.
+  {
+    params.axis = 0;
+    params.activation = luci::FusedActFunc::NONE;
+
+    Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+    kernel.configure();
+    for (auto t : kernel.getOutputTensors())
+    {
+      _memory_manager->allocate_memory(*t);
+    }
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor),
+                FloatArrayNear({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}));
+  }
+  {
+    params.axis = -2; // Same as '0'.
+    params.activation = luci::FusedActFunc::NONE;
+
+    Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor),
+                FloatArrayNear({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}));
+  }
+  {
+    params.axis = 1;
+    params.activation = luci::FusedActFunc::NONE;
+
+    Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor),
+                FloatArrayNear({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12}));
+  }
+  {
+    params.axis = -1; // Same as '1'.
+    params.activation = luci::FusedActFunc::NONE;
+
+    Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor),
+                FloatArrayNear({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12}));
+  }
+}
+
+TEST_F(ConcatenationTest, Input_Number_Check_NEG)
+{
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Invalid_Axis_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  params.axis = -3;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Mismatching_Input_Type_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<uint8_t> input2_data{7, 8, 9, 10, 11, 12};
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::U8>({2, 3}, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Mismatching_Input_Dimension_Num_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 3}, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Mismatching_Input_Dimension_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<float> input2_data{7, 8, 9, 10, 11, 12, 13, 14, 15};
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>({3, 3}, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Int8_Mismatching_Input_Type_NEG)
+{
+  std::vector<uint8_t> input1_data{1, 2, 3, 4};
+  std::vector<int8_t> input2_data{5, 6, 7, 8};
+  Tensor input1_tensor = makeInputTensor<DataType::U8>({2, 2}, input1_data, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S8>({2, 2}, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8);
+  ConcatenationParams params{};
+
+  params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Int8_Mismatching_Input_Output_Quant_Params_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+  int quantized_dimension = 3;
+  std::vector<float> scales{0.1, 0.2, 0.3};
+  std::vector<int32_t> zero_points{1, -1, 1};
+
+  Tensor input1_tensor = makeInputTensor<DataType::S8>(
+    {1, 1, 2, 3}, scales, zero_points, quantized_dimension, input1_data, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S8>(
+    {1, 1, 2, 3}, scales, zero_points, quantized_dimension, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, scales.at(0), zero_points.at(0));
+  ConcatenationParams params{};
+
+  params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ConcatenationTest, Int8_Mismatching_Zero_Point_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4};
+  std::vector<float> input2_data{5, 6, 7, 8};
+  float scale = 0.1;
+  int32_t zero_point_1 = 1;
+  int32_t zero_point_2 = -1;
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S8>({2, 2}, scale, zero_point_1, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::S8>({2, 2}, scale, zero_point_2, input2_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::S8, scale, zero_point_1);
+  ConcatenationParams params{};
+
+  params.axis = -1;
+  params.activation = luci::FusedActFunc::NONE;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+// TODO: Remove this test when concat w/ fused_activation is supported
+TEST_F(ConcatenationTest, With_Fused_Activation_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  params.axis = 1;
+  params.activation = luci::FusedActFunc::RELU;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.cpp
new file mode 100644
index 000000000..234f95425
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.cpp
@@ -0,0 +1,456 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Conv2D.h"
+
+#include "kernels/Utils.h"
+
+#include "PALConv2d.h"
+
+#include <stdexcept>
+#include <thread>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Conv2D::Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
+               Tensor *scratchpad, const Conv2DParams &params)
+  : KernelWithParams<Conv2DParams>({input, filter, bias}, {output, scratchpad}, params)
+{
+}
+
+void Conv2D::configure()
+{
+  // TensorFlow Lite (as of v2.2.0) supports the following combinations of types:
+  //     | input filter bias  output |
+  // ----+---------------------------+
+  // (1) | float float  float float  |
+  // (2) | float int8   float float  | hybrid
+  // (3) | uint8 uint8  int32 uint8  | quantized
+  // (4) | int8  int8   int32 int8   | quantized per channel
+  //
+  // We only support (1), (3) and (4) for now, and additionally the following:
+  //     | input filter bias  output |
+  // ----+---------------------------+
+  // (5) | int16 int16  int64 int16  |
+  //
+  if (input()->element_type() == DataType::FLOAT32 && filter()->element_type() == DataType::FLOAT32)
+  {
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
+  }
+  else if (input()->element_type() == DataType::U8 && filter()->element_type() == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
+  }
+  else if (input()->element_type() == DataType::S8 && filter()->element_type() == DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
+    LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+    LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
+                           static_cast<size_t>(filter()->shape().dim(0)));
+    for (auto zerop : filter()->zero_points())
+    {
+      LUCI_INTERPRETER_CHECK(zerop == 0);
+    }
+  }
+  else if (input()->element_type() == DataType::S16 && filter()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S64);
+  }
+  else
+  {
+    throw std::runtime_error("Unsupported type.");
+  }
+  LUCI_INTERPRETER_CHECK(output()->element_type() == input()->element_type());
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4);
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t output_depth = filter_shape.dim(0);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  LUCI_INTERPRETER_CHECK(filter_shape.dim(3) == input_shape.dim(3));
+
+  LUCI_INTERPRETER_CHECK(bias() == nullptr || (bias()->shape().num_dims() == 1 &&
+                                               bias()->shape().dim(0) == output_depth));
+
+  const int32_t output_height =
+    computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
+                      _params.dilation_height_factor);
+  const int32_t output_width =
+    computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width,
+                      _params.dilation_width_factor);
+
+  _padding_height = computePadding(_params.stride_height, _params.dilation_height_factor,
+                                   input_height, filter_height, output_height);
+  _padding_width = computePadding(_params.stride_width, _params.dilation_width_factor, input_width,
+                                  filter_width, output_width);
+
+  output()->resize({batches, output_height, output_width, output_depth});
+
+  // Allocate tensor for scratchpad, if needed.
+  tflite::ConvParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+  auto scratchpad = getOutputTensors()[1];
+  luci_interpreter_pal::SetupScratchpadTensor(scratchpad, input()->element_type(), params,
+                                              getTensorShape(input()), getTensorShape(filter()),
+                                              getTensorShape(output()));
+
+  switch (_params.activation)
+  {
+    case Activation::NONE:
+    case Activation::RELU:
+    case Activation::RELU6:
+    case Activation::RELU_N1_TO_1:
+      break;
+    default:
+      throw std::runtime_error("Unsupported fused activation");
+  }
+}
+
+void Conv2D::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      if (filter()->element_type() == DataType::FLOAT32)
+      {
+        evalFloat();
+        break;
+      }
+      throw std::runtime_error("Unsupported type.");
+    case DataType::U8:
+      if (filter()->scales().size() == 1)
+      {
+        evalQuantized();
+      }
+      else if (filter()->scales().size() > 1)
+      {
+        LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+        LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
+                               static_cast<size_t>(filter()->shape().dim(0)));
+        evalQuantizedPerChannel();
+      }
+      break;
+    case DataType::S8:
+      evalQuantizedS8PerChannel();
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Conv2D::evalFloat() const
+{
+  float activation_min{};
+  float activation_max{};
+  calculateActivationRange(_params.activation, &activation_min, &activation_max);
+
+  tflite::ConvParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+  params.float_activation_min = activation_min;
+  params.float_activation_max = activation_max;
+
+  auto scratchpad = getOutputTensors()[1];
+  float *scratchpad_data = nullptr;
+  if (scratchpad->is_allocatable())
+    scratchpad_data = scratchpad->data<float>();
+
+  luci_interpreter_pal::Conv(params, getTensorShape(input()), getTensorData<float>(input()),
+                             getTensorShape(filter()), getTensorData<float>(filter()),
+                             getTensorShape(bias()), getTensorData<float>(bias()),
+                             getTensorShape(output()), getTensorData<float>(output()),
+                             getTensorShape(scratchpad), scratchpad_data);
+}
+
+void Conv2D::evalQuantized() const
+{
+  const auto input_scale = static_cast<double>(input()->scale());
+  const auto filter_scale = static_cast<double>(filter()->scale());
+  const auto output_scale = static_cast<double>(output()->scale());
+
+  const double real_multiplier = input_scale * filter_scale / output_scale;
+  int32_t output_multiplier{};
+  int output_shift{};
+  quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::ConvParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+  // The kernel expects input and filter zero points to be negated.
+  params.input_offset = -input()->zero_point();    // Note the '-'.
+  params.weights_offset = -filter()->zero_point(); // Note the '-'.
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = output_multiplier;
+  params.output_shift = output_shift;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  auto scratchpad = getOutputTensors()[1];
+  luci_interpreter_pal::Conv(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+                             getTensorShape(filter()), getTensorData<uint8_t>(filter()),
+                             getTensorShape(bias()), getTensorData<int32_t>(bias()),
+                             getTensorShape(output()), getTensorData<uint8_t>(output()),
+                             getTensorShape(scratchpad), getTensorData<uint8_t>(scratchpad));
+}
+
+void Conv2D::evalQuantizedPerChannel() const
+{
+  const auto *input_data = getTensorData<uint8_t>(input());
+  const auto *filter_data = getTensorData<uint8_t>(filter());
+  const auto *bias_data = getTensorData<int32_t>(bias());
+  auto *output_data = getTensorData<uint8_t>(output());
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  const Shape &output_shape = output()->shape();
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t input_depth = input_shape.dim(3);
+  const int32_t output_depth = filter_shape.dim(0);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  const int32_t output_height = output_shape.dim(1);
+  const int32_t output_width = output_shape.dim(2);
+
+  const int32_t stride_height = _params.stride_height;
+  const int32_t stride_width = _params.stride_width;
+  const int32_t dilation_height_factor = _params.dilation_height_factor;
+  const int32_t dilation_width_factor = _params.dilation_width_factor;
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  const std::vector<double> effective_output_scale =
+    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+  const std::vector<ChannelQuantMultipliers> multipliers_raw =
+    quantizeMultipliers(effective_output_scale);
+  BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(multipliers_raw);
+
+  for (int32_t batch = 0; batch < batches; ++batch)
+  {
+    for (int32_t out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int32_t out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+        {
+          const int32_t in_y_origin = out_y * stride_height - _padding_height;
+          const int32_t in_x_origin = out_x * stride_width - _padding_width;
+          int32_t acc = 0;
+          for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+          {
+            for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+            {
+              const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
+              const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
+              if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
+              {
+                for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+                {
+                  const uint8_t input_val =
+                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+                  const uint8_t filter_val =
+                    filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
+                  acc += static_cast<int32_t>(input_val - input()->zero_point()) *
+                         static_cast<int32_t>(filter_val - filter()->zero_points()[out_c]);
+                }
+              }
+            }
+          }
+          if (bias_data)
+          {
+            acc += bias_data[out_c];
+          }
+
+          int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
+            acc, quant_multipliers[out_c].multiplier, quant_multipliers[out_c].shift);
+
+          scaled_acc += output()->zero_point();
+          scaled_acc = std::max(scaled_acc, activation_min);
+          scaled_acc = std::min(scaled_acc, activation_max);
+          output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
+        }
+      }
+    }
+  }
+}
+
+void Conv2D::evalQuantizedS8PerChannel() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::ConvParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+  // The kernel expects filter zero points to be negated.
+  params.input_offset = -input()->zero_point(); // Note the '-'.
+  params.weights_offset = 0;                    // Unused in tflite code
+  params.output_offset = output()->zero_point();
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  const std::vector<double> effective_output_scales =
+    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+  std::vector<ChannelQuantMultipliers> quant_multipliers =
+    quantizeMultipliers(effective_output_scales);
+
+  std::vector<int32_t> shifts;
+  std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts),
+                 [](ChannelQuantMultipliers cm) { return cm.shift; });
+  std::vector<int32_t> multipliers;
+  std::transform(quant_multipliers.begin(), quant_multipliers.end(),
+                 std::back_inserter(multipliers),
+                 [](ChannelQuantMultipliers cm) { return cm.multiplier; });
+
+  auto scratchpad = getOutputTensors()[1];
+  int8_t *scratchpad_data = nullptr;
+  if (scratchpad->is_allocatable())
+    scratchpad_data = scratchpad->data<int8_t>();
+
+  luci_interpreter_pal::ConvPerChannel(
+    params, multipliers.data(), shifts.data(), getTensorShape(input()),
+    getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()),
+    getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()),
+    getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
+}
+
+void Conv2D::evalQuantizedS16() const
+{
+  const auto *input_data = getTensorData<int16_t>(input());
+  const auto *filter_data = getTensorData<int16_t>(filter());
+  const auto *bias_data = getTensorData<int64_t>(bias());
+  auto *output_data = getTensorData<int16_t>(output());
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  const Shape &output_shape = output()->shape();
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t input_depth = input_shape.dim(3);
+  const int32_t output_depth = filter_shape.dim(0);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  const int32_t output_height = output_shape.dim(1);
+  const int32_t output_width = output_shape.dim(2);
+
+  const int32_t stride_height = _params.stride_height;
+  const int32_t stride_width = _params.stride_width;
+  const int32_t dilation_height_factor = _params.dilation_height_factor;
+  const int32_t dilation_width_factor = _params.dilation_width_factor;
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  const std::vector<double> effective_output_scale =
+    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+  const std::vector<ChannelQuantMultipliers> multipliers_raw =
+    quantizeMultipliers(effective_output_scale);
+  BroadcastableWrapper<ChannelQuantMultipliers> multipliers(multipliers_raw);
+
+  for (int32_t batch = 0; batch < batches; ++batch)
+  {
+    for (int32_t out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int32_t out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+        {
+          const int32_t in_y_origin = out_y * stride_height - _padding_height;
+          const int32_t in_x_origin = out_x * stride_width - _padding_width;
+          int64_t acc = 0;
+          for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+          {
+            for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+            {
+              const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
+              const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
+              if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
+              {
+                for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+                {
+                  const int16_t input_val =
+                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+                  const int16_t filter_val =
+                    filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
+                  acc += static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
+                }
+              }
+            }
+          }
+          if (bias_data)
+          {
+            acc += bias_data[out_c];
+          }
+
+          int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
+            acc, multipliers[out_c].multiplier, multipliers[out_c].shift);
+
+          scaled_acc = std::max(scaled_acc, activation_min);
+          scaled_acc = std::min(scaled_acc, activation_max);
+
+          output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
+        }
+      }
+    }
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.h b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.h
new file mode 100644
index 000000000..330bf3a2a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_CONV2D_H
+#define LUCI_INTERPRETER_KERNELS_CONV2D_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <memory>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Conv2D : public KernelWithParams<Conv2DParams>
+{
+public:
+  Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
+         Tensor *scratchpad, const Conv2DParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *filter() const { return _inputs[1]; }
+  const Tensor *bias() const { return _inputs[2]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void evalQuantizedPerChannel() const;
+  void evalQuantizedS8PerChannel() const;
+  void evalQuantizedS16() const;
+
+private:
+  int32_t _padding_height{};
+  int32_t _padding_width{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_CONV2D_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.test.cpp
new file mode 100644
index 000000000..0fe6ef795
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Conv2D.test.cpp
@@ -0,0 +1,707 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Conv2D.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class Conv2DTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(Conv2DTest, Float)
+{
+  Shape input_shape{1, 4, 3, 2};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{2};
+  std::vector<float> input_data{
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<float> bias_data{1, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(im2col);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    11, 16, 7, 20, // row = 0
+    0,  40, 0, 44, // row = 1
+  };
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 2};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, FloatPointwise)
+{
+  Shape input_shape{1, 2, 2, 2};
+  Shape filter_shape{2, 1, 1, 2};
+  Shape bias_shape{2};
+  std::vector<float> input_data{
+    1, 2, // row = 0, col = 0
+    3, 4, // row = 0, col = 1
+    5, 6, // row = 1, col = 0
+    7, 8, // row = 1, col = 1
+  };
+  std::vector<float> filter_data{
+    -1, 2, // out = 0
+    -3, 4, // out = 1
+  };
+  std::vector<float> bias_data{1, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 1;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(im2col);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    4, 7,  6,  9,  // row = 0
+    8, 11, 10, 13, // row = 1
+  };
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 2};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, FloatCheck)
+{
+  Shape input_shape{2, 2, 4, 1};
+  Shape filter_shape{3, 2, 2, 1};
+  Shape bias_shape{3};
+  std::vector<float> input_data{
+    // First batch
+    1, 1, 1, 1, // row = 1
+    2, 2, 2, 2, // row = 2
+    // Second batch
+    1, 2, 3, 4, // row = 1
+    1, 2, 3, 4, // row = 2
+  };
+  std::vector<float> filter_data{
+    1,  2,  3,  4, // first 2x2 filter
+    -1, 1,  -1, 1, // second 2x2 filter
+    -1, -1, 1,  1, // third 2x2 filter
+  };
+  std::vector<float> bias_data{1, 2, 3};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::NONE;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(im2col);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    18, 2, 5, // first batch, left
+    18, 2, 5, // first batch, right
+    17, 4, 3, // second batch, left
+    37, 4, 3, // second batch, right
+  };
+  std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, Uint8)
+{
+  std::vector<float> input_data{
+    // First batch
+    1, 1, 1, 1, // row = 1
+    2, 2, 2, 2, // row = 2
+                // Second batch
+    1, 2, 3, 4, // row = 1
+    1, 2, 3, 4, // row = 2
+  };
+  std::vector<float> filter_data{
+    1,  2,  3,  4, // first 2x2 filter
+    -1, 1,  -1, 1, // second 2x2 filter
+    -1, -1, 1,  1, // third 2x2 filter
+  };
+  std::vector<float> bias_data{1, 2, 3};
+
+  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::U8>({3, 2, 2, 1}, input_quant_param.first, input_quant_param.second,
+                                  filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(
+    {3}, input_quant_param.first * input_quant_param.first, 0, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::U8, Shape({}), {}, "");
+  Tensor output_tensor =
+    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::NONE;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(im2col);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    18, 2, 5, // first batch, left
+    18, 2, 5, // first batch, right
+    17, 4, 3, // second batch, left
+    37, 4, 3, // second batch, right
+  };
+  std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, Uint8_CWQ)
+{
+  const int output_channels = 3;
+  std::vector<float> input_data{
+    // First batch
+    1, 1, 1, 1, // row = 1
+    2, 2, 2, 2, // row = 2
+                // Second batch
+    1, 2, 3, 4, // row = 1
+    1, 2, 3, 4, // row = 2
+  };
+  std::vector<float> filter_data{
+    1,  2,  3,  4, // first 2x2 filter
+    -1, 1,  -1, 1, // second 2x2 filter
+    -1, -1, 1,  1, // third 2x2 filter
+  };
+  std::vector<float> bias_data{1, 2, 3};
+  Shape filter_shape{output_channels, 2, 2, 1};
+
+  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(0, 4);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
+
+  std::vector<std::pair<float, int32_t>> filter_quant_params;
+  filter_quant_params.push_back(quantizationParams<uint8_t>(0, 4));
+  filter_quant_params.push_back(quantizationParams<uint8_t>(-1, 1));
+  filter_quant_params.push_back(quantizationParams<uint8_t>(-1, 1));
+
+  std::vector<float> filter_scales;
+  std::vector<int32_t> filter_zerops;
+  for (auto iter : filter_quant_params)
+  {
+    filter_scales.push_back(iter.first);
+    filter_zerops.push_back(iter.second);
+  }
+
+  std::vector<float> bias_scales;
+  for (int i = 0; i < output_channels; ++i)
+    bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
+  std::vector<int32_t> zerop(output_channels, 0);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops,
+                                                       0, filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0,
+                                                      bias_data, _memory_manager.get());
+  Tensor im2col(DataType::U8, Shape({}), {}, "");
+  Tensor output_tensor =
+    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::NONE;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(im2col);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    18, 2, 5, // first batch, left
+    18, 2, 5, // first batch, right
+    17, 4, 3, // second batch, left
+    37, 4, 3, // second batch, right
+  };
+  std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, SInt8_CWQ)
+{
+  const int output_channels = 3;
+  std::vector<float> input_data{
+    // First batch
+    1, 1, 1, 1, // row = 1
+    2, 2, 2, 2, // row = 2
+                // Second batch
+    1, 2, 3, 4, // row = 1
+    1, 2, 3, 4, // row = 2
+  };
+  std::vector<float> filter_data{
+    1,  2,  3,  4, // first 2x2 filter
+    -1, 1,  -1, 1, // second 2x2 filter
+    -1, -1, 1,  1, // third 2x2 filter
+  };
+  std::vector<float> bias_data{1, 2, 3};
+  Shape filter_shape{output_channels, 2, 2, 1};
+
+  std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(0, 4);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128);
+
+  std::vector<std::pair<float, int32_t>> filter_quant_params;
+  filter_quant_params.push_back(std::pair<float, int32_t>(0.5, 0));
+  filter_quant_params.push_back(std::pair<float, int32_t>(0.25, 0));
+  filter_quant_params.push_back(std::pair<float, int32_t>(0.125, 0));
+
+  std::vector<float> filter_scales;
+  std::vector<int32_t> filter_zerops;
+  for (auto iter : filter_quant_params)
+  {
+    filter_scales.push_back(iter.first);
+    filter_zerops.push_back(iter.second);
+  }
+
+  std::vector<float> bias_scales;
+  for (int i = 0; i < output_channels; ++i)
+    bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
+  std::vector<int32_t> zerop(output_channels, 0);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S8>({2, 2, 4, 1}, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::S8>(filter_shape, filter_scales, filter_zerops,
+                                                       0, filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0,
+                                                      bias_data, _memory_manager.get());
+  Tensor im2col(DataType::S8, Shape({}), {}, "");
+  Tensor output_tensor =
+    makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::NONE;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(im2col);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    18, 2, 5, // first batch, left
+    18, 2, 5, // first batch, right
+    17, 4, 3, // second batch, left
+    37, 4, 3, // second batch, right
+  };
+  std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(Conv2DTest, SInt16)
+{
+  Shape input_shape{1, 4, 3, 2};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{2};
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 2};
+
+  std::vector<float> input_data{
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<float> bias_data{1, 2};
+  std::vector<float> ref_output_data{
+    11, 16, 7, 20, // row = 0
+    0,  40, 0, 44, // row = 1
+  };
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::S16>(filter_shape, 0.2, 0, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::S16, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(im2col);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(Conv2DTest, SInt16_CWQ_weights)
+{
+  Shape input_shape{1, 2, 2, 2};  // Batch x H x W x C
+  Shape filter_shape{3, 1, 1, 2}; // Out channels x H x W x In Channels
+  Shape bias_shape{3};
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 3};
+
+  std::vector<float> input_data{
+    1, 2, // row = 0, col 0
+    3, 4, // row = 0, col 1
+    5, 6, // row = 1, col 0
+    7, 8, // row = 1, col 1
+  };
+  std::vector<float> filter_data{
+    4, -3, // out = 0
+    1, -3, // out = 1
+    5, -3, // out = 2
+  };
+  std::vector<float> bias_data{1, 10, 5};
+  std::vector<float> ref_output_data{
+    0, 5, 4,  // row 0, col 0
+    1, 1, 8,  // row 0, col 1
+    3, 0, 12, // row 1, col 0
+    5, 0, 16, // row 1, col 1
+  };
+
+  float input_scale = 0.25f;
+  float output_scale = 0.05f;
+  std::vector<float> filter_scales = {0.25f, 0.2f, 0.1f};
+  std::vector<float> bias_scales;
+  for (int i = 0; i < filter_scales.size(); ++i)
+    bias_scales.push_back(filter_scales[i] * input_scale);
+  std::vector<int32_t> zerop = {0, 0, 0};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, _memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0,
+                                                        filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data,
+                                                      _memory_manager.get());
+  Tensor im2col(DataType::S16, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::S16, output_scale, 0);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 1;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(im2col);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(Conv2DTest, Unsupported_Type_Configure_NEG)
+{
+  Shape input_shape{1, 4, 3, 2};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{2};
+  std::vector<int32_t> input_data{
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<float> bias_data{1, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(Conv2DTest, Invalid_Bias_Type_NEG)
+{
+  Shape input_shape{1, 4, 3, 2};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{2};
+  std::vector<float> input_data{
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<uint8_t> bias_data{1, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::U8>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(Conv2DTest, Invalid_Bias_Data_NEG)
+{
+  Shape input_shape{1, 4, 3, 2};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{3};
+  std::vector<float> input_data{
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<float> bias_data{1, 2, 3};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(Conv2DTest, Invalid_Input_Shape_NEG)
+{
+  Shape input_shape{1, 4, 6, 1};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{2};
+  std::vector<float> input_data{
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<float> bias_data{1, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(Conv2DTest, Invalid_fused_act_tanh_NEG)
+{
+  Shape input_shape{1, 4, 3, 2};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{2};
+  std::vector<float> input_data{
+    1,  2,  3,  4,  5,  6,  // row = 0
+    7,  8,  9,  10, 11, 12, // row = 1
+    13, 14, 15, 16, 17, 18, // row = 2
+    19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+    1,  2,  -3, -4, // out = 0, row = 0
+    -5, 6,  -7, 8,  // out = 1, row = 0
+    4,  -2, 3,  -1, // out = 0, row = 1
+    -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<float> bias_data{1, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor im2col(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::TANH;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &im2col, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.cpp
new file mode 100644
index 000000000..3a9acd1d4
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.cpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "DepthToSpace.h"
+#include "Utils.h"
+#include "PALDepthToSpace.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+DepthToSpace::DepthToSpace(const Tensor *input, Tensor *output, const DepthToSpaceParams &params)
+  : KernelWithParams<DepthToSpaceParams>({input}, {output}, params)
+{
+}
+
+void DepthToSpace::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32 ||
+                         output()->element_type() == DataType::U8)
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type())
+  const int block_size = params().block_size;
+  const int32_t input_height = input()->shape().dim(1);
+  const int32_t input_width = input()->shape().dim(2);
+  const int32_t input_channels = input()->shape().dim(3);
+  int32_t output_height = input_height * block_size;
+  int32_t output_width = input_width * block_size;
+  int32_t output_channels = input_channels / block_size / block_size;
+
+  LUCI_INTERPRETER_CHECK(input_height == output_height / block_size);
+  LUCI_INTERPRETER_CHECK(input_width == output_width / block_size);
+  LUCI_INTERPRETER_CHECK(input_channels == output_channels * block_size * block_size);
+
+  Shape output_shape(4);
+  output_shape.dim(0) = input()->shape().dim(0);
+  output_shape.dim(1) = output_height;
+  output_shape.dim(2) = output_width;
+  output_shape.dim(3) = output_channels;
+
+  output()->resize(output_shape);
+}
+
+void DepthToSpace::execute() const
+{
+  tflite::DepthToSpaceParams op_params;
+  op_params.block_size = params().block_size;
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      luci_interpreter_pal::DepthToSpace(op_params, getTensorShape(input()),
+                                         getTensorData<float>(input()), getTensorShape(output()),
+                                         getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      luci_interpreter_pal::DepthToSpace(op_params, getTensorShape(input()),
+                                         getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                         getTensorData<uint8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported Type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.h b/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.h
new file mode 100644
index 000000000..63ce37610
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_DEPTHTOSPACE_H
+#define LUCI_INTERPRETER_KERNELS_DEPTHTOSPACE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <vector>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class DepthToSpace : public KernelWithParams<DepthToSpaceParams>
+{
+public:
+  DepthToSpace(const Tensor *input, Tensor *output, const DepthToSpaceParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_DEPTHTOSPACE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.test.cpp
new file mode 100644
index 000000000..88e6e07f1
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/DepthToSpace.test.cpp
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/DepthToSpace.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T> class DepthToSpaceTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(DepthToSpaceTest, DataTypes);
+
+TYPED_TEST(DepthToSpaceTest, SimpleCase)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<TypeParam> input_data{1, 2, 3, 4, 5, 6, 7, 8};
+  Shape input_shape{1, 1, 2, 4};
+  std::vector<TypeParam> output_data{1, 2, 5, 6, 3, 4, 7, 8};
+  std::vector<int32_t> output_shape{1, 2, 4, 1};
+
+  Tensor input_tensor =
+    makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>());
+
+  DepthToSpaceParams params{};
+  params.block_size = 2;
+
+  DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
+              ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(DepthToSpaceTest, InvalidInputShape_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8};
+  Shape input_shape{1, 2, 4};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  DepthToSpaceParams params{};
+  params.block_size = 2;
+
+  DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(DepthToSpaceTest, InOutTypeMismatch_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8};
+  Shape input_shape{1, 1, 2, 4};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  DepthToSpaceParams params{};
+  params.block_size = 2;
+
+  DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(DepthToSpaceTest, InvalidBlockSize_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8};
+  Shape input_shape{1, 1, 2, 4};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  DepthToSpaceParams params{};
+  params.block_size = 3;
+
+  DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
new file mode 100644
index 000000000..c554c309d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
@@ -0,0 +1,451 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/DepthwiseConv2D.h"
+
+#include "kernels/Utils.h"
+
+#include "PALDepthwiseConv2d.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+DepthwiseConv2D::DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias,
+                                 Tensor *output, Tensor *scratchpad,
+                                 const DepthwiseConv2DParams &params)
+  : KernelWithParams<DepthwiseConv2DParams>({input, filter, bias}, {output, scratchpad}, params)
+{
+}
+
+void DepthwiseConv2D::configure()
+{
+  // TensorFlow Lite (as of v2.2.0) supports the following combinations of types:
+  //     | input filter bias  output |
+  // ----+---------------------------+
+  // (1) | float float  float float  |
+  // (2) | float int8   float float  | hybrid
+  // (3) | uint8 uint8  int32 uint8  | quantized
+  // (4) | int8  int8   int32 int8   | quantized per channel
+  // (5) | int16 int8   int64 int16  | quantized per channel 16x8
+  //
+  // We only support (1), (3) and (4) for now, and additionally the following:
+  //     | input filter bias  output |
+  // ----+---------------------------+
+  // (5) | int16 int16  int64 int16  |
+  //
+  if (input()->element_type() == DataType::FLOAT32 && filter()->element_type() == DataType::FLOAT32)
+  {
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
+  }
+  else if (input()->element_type() == DataType::U8 && filter()->element_type() == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
+  }
+  else if (input()->element_type() == DataType::S8 && filter()->element_type() == DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+    LUCI_INTERPRETER_CHECK(static_cast<uint32_t>(filter()->shape().dim(3)) ==
+                           filter()->scales().size());
+    for (auto zerop : filter()->zero_points())
+    {
+      LUCI_INTERPRETER_CHECK(zerop == 0);
+    }
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
+  }
+  else if (input()->element_type() == DataType::S16 && filter()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S64);
+  }
+  else
+  {
+    throw std::runtime_error("Unsupported type.");
+  }
+  LUCI_INTERPRETER_CHECK(output()->element_type() == input()->element_type());
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4);
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  // Filter format: [1, H, W, O].
+  LUCI_INTERPRETER_CHECK(filter_shape.dim(0) == 1);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  const int32_t channels_out = filter_shape.dim(3);
+
+  LUCI_INTERPRETER_CHECK(bias() == nullptr || (bias()->shape().num_dims() == 1 &&
+                                               bias()->shape().dim(0) == channels_out));
+
+  const int32_t output_height =
+    computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
+                      _params.dilation_height_factor);
+  const int32_t output_width =
+    computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width,
+                      _params.dilation_width_factor);
+
+  _padding_height = computePadding(_params.stride_height, _params.dilation_height_factor,
+                                   input_height, filter_height, output_height);
+  _padding_width = computePadding(_params.stride_width, _params.dilation_width_factor, input_width,
+                                  filter_width, output_width);
+
+  output()->resize({batches, output_height, output_width, channels_out});
+
+  tflite::DepthwiseParams params{};
+
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+
+  auto scratchpad = getOutputTensors()[1];
+  luci_interpreter_pal::SetupScratchpadTensor(scratchpad, params, input()->element_type(),
+                                              getTensorShape(input()), getTensorShape(filter()),
+                                              getTensorShape(output()));
+}
+
+void DepthwiseConv2D::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      if (filter()->element_type() == DataType::FLOAT32)
+      {
+        evalFloat();
+        break;
+      }
+      throw std::runtime_error("Unsupported type.");
+    case DataType::U8:
+      if (filter()->scales().size() == 1)
+      {
+        evalQuantized();
+      }
+      else if (filter()->scales().size() > 1)
+      {
+        LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+        LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
+                               static_cast<size_t>(filter()->shape().dim(3)));
+        evalQuantizedPerChannel();
+      }
+      break;
+    case DataType::S8:
+      evalQuantizedS8PerChannel();
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void DepthwiseConv2D::evalFloat() const
+{
+  float activation_min{};
+  float activation_max{};
+  calculateActivationRange(_params.activation, &activation_min, &activation_max);
+
+  tflite::DepthwiseParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+  params.depth_multiplier = _params.depth_multiplier;
+  params.float_activation_min = activation_min;
+  params.float_activation_max = activation_max;
+
+  tflite::reference_ops::DepthwiseConv(
+    params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
+    getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
+    getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void DepthwiseConv2D::evalQuantizedPerChannel() const
+{
+  const auto *input_data = getTensorData<uint8_t>(input());
+  const auto *filter_data = getTensorData<uint8_t>(filter());
+  const auto *bias_data = getTensorData<int32_t>(bias());
+  auto *output_data = getTensorData<uint8_t>(output());
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  const Shape &output_shape = output()->shape();
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t input_depth = input_shape.dim(3);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  const int32_t output_height = output_shape.dim(1);
+  const int32_t output_width = output_shape.dim(2);
+
+  const int32_t stride_height = _params.stride_height;
+  const int32_t stride_width = _params.stride_width;
+  const int32_t dilation_height_factor = _params.dilation_height_factor;
+  const int32_t dilation_width_factor = _params.dilation_width_factor;
+  const int32_t depth_multiplier = _params.depth_multiplier;
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  const std::vector<double> effective_output_scales =
+    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+  std::vector<ChannelQuantMultipliers> quant_multipliers_raw =
+    quantizeMultipliers(effective_output_scales);
+  BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw);
+
+  for (int batch = 0; batch < batches; ++batch)
+  {
+    for (int out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int in_channel = 0; in_channel < input_depth; ++in_channel)
+        {
+          for (int m = 0; m < depth_multiplier; ++m)
+          {
+            const int output_channel = m + in_channel * depth_multiplier;
+            const int in_x_origin = (out_x * stride_width) - _padding_width;
+            const int in_y_origin = (out_y * stride_height) - _padding_height;
+            int32 acc = 0;
+            for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+            {
+              for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+              {
+                const int in_x = in_x_origin + dilation_width_factor * filter_x;
+                const int in_y = in_y_origin + dilation_height_factor * filter_y;
+                // Zero padding by omitting the areas outside the image.
+                const bool is_point_inside_image =
+                  (in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);
+                if (is_point_inside_image)
+                {
+                  int32 input_val =
+                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_channel)];
+                  int32 filter_val =
+                    filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, output_channel)];
+                  acc += (filter_val - filter()->zero_points()[output_channel]) *
+                         (input_val - input()->zero_point());
+                }
+              }
+            }
+            if (bias_data)
+            {
+              acc += bias_data[output_channel];
+            }
+            int32_t output_multiplier = quant_multipliers[output_channel].multiplier;
+            int output_shift = quant_multipliers[output_channel].shift;
+            int32_t scaled_acc =
+              tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+            scaled_acc += output()->zero_point();
+            scaled_acc = std::max(scaled_acc, activation_min);
+            scaled_acc = std::min(scaled_acc, activation_max);
+            output_data[calcOffset(output_shape, batch, out_y, out_x, output_channel)] =
+              static_cast<uint8_t>(scaled_acc);
+          }
+        }
+      }
+    }
+  }
+}
+
+void DepthwiseConv2D::evalQuantized() const
+{
+  const auto input_scale = static_cast<double>(input()->scale());
+  const auto filter_scale = static_cast<double>(filter()->scale());
+  const auto output_scale = static_cast<double>(output()->scale());
+
+  const double real_multiplier = input_scale * filter_scale / output_scale;
+  int32_t output_multiplier{};
+  int output_shift{};
+  quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::DepthwiseParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+  params.depth_multiplier = _params.depth_multiplier;
+  // The kernel expects input and filter zero points to be negated.
+  params.input_offset = -input()->zero_point();    // Note the '-'.
+  params.weights_offset = -filter()->zero_point(); // Note the '-'.
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = output_multiplier;
+  params.output_shift = output_shift;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  tflite::reference_ops::DepthwiseConv(
+    params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(filter()),
+    getTensorData<uint8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
+    getTensorShape(output()), getTensorData<uint8_t>(output()));
+}
+
+void DepthwiseConv2D::evalQuantizedS8PerChannel() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::DepthwiseParams params{};
+
+  params.padding_type = tflite::PaddingType::kSame;
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+  params.depth_multiplier = _params.depth_multiplier;
+  // The kernel expects input and filter zero points to be negated.
+  params.input_offset = -input()->zero_point(); // Note the '-'.
+  params.weights_offset = 0;
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = 1; // unused in tflite code
+  params.output_shift = 0;      // unused in tflite code
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  const std::vector<double> effective_output_scales =
+    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+  std::vector<ChannelQuantMultipliers> quant_multipliers =
+    quantizeMultipliers(effective_output_scales);
+
+  std::vector<int32_t> shifts;
+  std::transform(quant_multipliers.begin(), quant_multipliers.end(), std::back_inserter(shifts),
+                 [](ChannelQuantMultipliers cm) { return cm.shift; });
+  std::vector<int32_t> multipliers;
+  std::transform(quant_multipliers.begin(), quant_multipliers.end(),
+                 std::back_inserter(multipliers),
+                 [](ChannelQuantMultipliers cm) { return cm.multiplier; });
+
+  auto scratchpad = getOutputTensors()[1];
+  int8_t *scratchpad_data = nullptr;
+  if (scratchpad->is_allocatable())
+    scratchpad_data = scratchpad->data<int8_t>();
+
+  luci_interpreter_pal::DepthwiseConvPerChannel<int8_t>(
+    params, multipliers.data(), shifts.data(), getTensorShape(input()),
+    getTensorData<int8_t>(input()), getTensorShape(filter()), getTensorData<int8_t>(filter()),
+    getTensorShape(bias()), getTensorData<int32_t>(bias()), getTensorShape(output()),
+    getTensorData<int8_t>(output()), getTensorShape(scratchpad), scratchpad_data);
+}
+
+void DepthwiseConv2D::evalQuantizedS16() const
+{
+  const auto *input_data = getTensorData<int16_t>(input());
+  const auto *filter_data = getTensorData<int16_t>(filter());
+  const auto *bias_data = getTensorData<int64_t>(bias());
+  auto *output_data = getTensorData<int16_t>(output());
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  const Shape &output_shape = output()->shape();
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t input_depth = input_shape.dim(3);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  const int32_t output_height = output_shape.dim(1);
+  const int32_t output_width = output_shape.dim(2);
+
+  const int32_t stride_height = _params.stride_height;
+  const int32_t stride_width = _params.stride_width;
+  const int32_t dilation_height_factor = _params.dilation_height_factor;
+  const int32_t dilation_width_factor = _params.dilation_width_factor;
+  const int32_t depth_multiplier = _params.depth_multiplier;
+
+  const std::vector<double> effective_output_scales =
+    getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+  std::vector<ChannelQuantMultipliers> quant_multipliers_raw =
+    quantizeMultipliers(effective_output_scales);
+
+  BroadcastableWrapper<ChannelQuantMultipliers> quant_multipliers(quant_multipliers_raw);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  for (int32_t batch = 0; batch < batches; ++batch)
+  {
+    for (int32_t out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int32_t out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+        {
+          for (int32_t m = 0; m < depth_multiplier; ++m)
+          {
+            const int32_t out_c = m + in_c * depth_multiplier;
+            const int32_t in_y_origin = out_y * stride_height - _padding_height;
+            const int32_t in_x_origin = out_x * stride_width - _padding_width;
+            int64_t acc = 0;
+            for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+            {
+              for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+              {
+                const int32_t in_y = in_y_origin + dilation_height_factor * filter_y;
+                const int32_t in_x = in_x_origin + dilation_width_factor * filter_x;
+                if ((in_y >= 0 && in_y < input_height) && (in_x >= 0 && in_x < input_width))
+                {
+                  const int16_t input_val =
+                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+                  const int16_t filter_val =
+                    filter_data[calcOffset(filter_shape, 0, filter_y, filter_x, out_c)];
+                  acc += static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
+                }
+              }
+            }
+            if (bias_data != nullptr)
+            {
+              acc += bias_data[out_c];
+            }
+
+            int32_t output_multiplier = quant_multipliers[out_c].multiplier;
+            int output_shift = quant_multipliers[out_c].shift;
+            int32_t scaled_acc =
+              tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+
+            scaled_acc = std::max(scaled_acc, activation_min);
+            scaled_acc = std::min(scaled_acc, activation_max);
+
+            output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
+          }
+        }
+      }
+    }
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.h b/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.h
new file mode 100644
index 000000000..3d1faf6c1
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_DEPTHWISECONV2D_H
+#define LUCI_INTERPRETER_KERNELS_DEPTHWISECONV2D_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class DepthwiseConv2D : public KernelWithParams<DepthwiseConv2DParams>
+{
+public:
+  DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
+                  Tensor *scratchpad, const DepthwiseConv2DParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *filter() const { return _inputs[1]; }
+  const Tensor *bias() const { return _inputs[2]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void evalQuantizedPerChannel() const;
+  void evalQuantizedS8PerChannel() const;
+  void evalQuantizedS16() const;
+
+private:
+  int32_t _padding_height{};
+  int32_t _padding_width{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_DEPTHWISECONV2D_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp
new file mode 100644
index 000000000..6b4673f3e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp
@@ -0,0 +1,622 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/DepthwiseConv2D.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class DepthwiseConv2DTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(DepthwiseConv2DTest, Float)
+{
+  Shape input_shape{1, 4, 2, 2};
+  Shape filter_shape{1, 2, 2, 4};
+  Shape bias_shape{4};
+  std::vector<float> input_data{
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  kernel.configure();
+  _memory_manager->allocate_memory(scratchpad);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    71,  0, 99,  0,  //
+    167, 0, 227, 28, //
+  };
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 4}));
+}
+
+TEST_F(DepthwiseConv2DTest, Uint8)
+{
+  std::vector<float> input_data{
+    1, 2, 7,  8,  // column 1
+    3, 4, 9,  10, // column 2
+    5, 6, 11, 12, // column 3
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+
+  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({1, 3, 2, 2}, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::U8>({1, 2, 2, 4}, input_quant_param.first, input_quant_param.second,
+                                  filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(
+    {4}, input_quant_param.first * input_quant_param.first, 0, bias_data, _memory_manager.get());
+  Tensor output_tensor =
+    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 1;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::NONE;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    71, -34, 99,  -20, //
+    91, -26, 127, -4,  //
+  };
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 4}));
+}
+
+TEST_F(DepthwiseConv2DTest, SInt16)
+{
+  Shape input_shape{1, 4, 2, 2};
+  Shape filter_shape{1, 2, 2, 4};
+  Shape bias_shape{4};
+  std::vector<int32_t> ref_output_shape{1, 2, 1, 4};
+
+  std::vector<float> input_data{
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  std::vector<float> ref_output_data{
+    71,  0, 99,  0,  //
+    167, 0, 227, 28, //
+  };
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, 0.25, 0, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::S16>(filter_shape, 0.2, 0, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::S64>(bias_shape, 0.25 * 0.2, 0, bias_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+  Tensor scratchpad(DataType::S64, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(DepthwiseConv2DTest, SInt16_CWQ_weights)
+{
+  const int output_channels = 4;
+  Shape input_shape{1, 4, 2, 2};
+  Shape filter_shape{1, 2, 2, output_channels};
+  Shape bias_shape{4};
+  std::vector<int32_t> ref_output_shape{1, 2, 1, output_channels};
+
+  std::vector<float> input_data{
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  std::vector<float> ref_output_data{
+    71,  0, 99,  0,  //
+    167, 0, 227, 28, //
+  };
+
+  float input_scale = 0.25;
+  std::vector<float> filter_scales{0.2f, 1.f, 0.5f, 0.1f};
+  std::vector<float> bias_scales;
+  for (int i = 0; i < output_channels; ++i)
+    bias_scales.push_back(filter_scales[i] * input_scale);
+  std::vector<int32_t> zerop(4, 0);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, _memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 3,
+                                                        filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data,
+                                                      _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+  Tensor scratchpad(DataType::S16, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(DepthwiseConv2DTest, Uint8_CWQ_weights)
+{
+  const int output_channels = 4;
+  Shape input_shape{1, 3, 2, 2};
+  Shape filter_shape{1, 2, 2, output_channels};
+  Shape bias_shape{4};
+  std::vector<int32_t> ref_output_shape{1, 2, 1, output_channels};
+
+  std::vector<float> input_data{
+    1, 2, 7,  8,  //
+    3, 4, 9,  10, //
+    5, 6, 11, 12, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  std::vector<float> ref_output_data{
+    71, -34, 99,  -20, //
+    91, -26, 127, -4,  //
+  };
+
+  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(0, 16);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
+
+  std::vector<std::pair<float, int32_t>> filter_quant_params;
+  filter_quant_params.push_back(quantizationParams<uint8_t>(-9, 13));
+  filter_quant_params.push_back(quantizationParams<uint8_t>(-14, 10));
+  filter_quant_params.push_back(quantizationParams<uint8_t>(-11, 15));
+  filter_quant_params.push_back(quantizationParams<uint8_t>(-16, 12));
+
+  std::vector<float> filter_scales;
+  std::vector<int32_t> filter_zerops;
+  for (auto iter : filter_quant_params)
+  {
+    filter_scales.push_back(iter.first);
+    filter_zerops.push_back(iter.second);
+  }
+
+  std::vector<float> bias_scales;
+  for (int i = 0; i < output_channels; ++i)
+    bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
+  std::vector<int32_t> zerop(output_channels, 0);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::U8>(filter_shape, filter_scales, filter_zerops,
+                                                       3, filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_scales, zerop, 0, bias_data,
+                                                      _memory_manager.get());
+  Tensor output_tensor =
+    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+  Tensor scratchpad(DataType::U8, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 1;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::NONE;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, output_quant_param.first));
+}
+
+TEST_F(DepthwiseConv2DTest, SInt8_CWQ_weights)
+{
+  const int output_channels = 4;
+  Shape input_shape{1, 3, 2, 2};
+  Shape filter_shape{1, 2, 2, output_channels};
+  Shape bias_shape{4};
+  std::vector<int32_t> ref_output_shape{1, 2, 1, output_channels};
+
+  std::vector<float> input_data{
+    1, 2, 7,  8,  //
+    3, 4, 9,  10, //
+    5, 6, 11, 12, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  std::vector<float> ref_output_data{
+    71, -34, 99,  -20, //
+    91, -26, 127, -4,  //
+  };
+
+  std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-128, 127);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128);
+
+  std::vector<std::pair<float, int32_t>> filter_quant_params;
+  filter_quant_params.push_back(std::pair<float, int32_t>(0.5, 0));
+  filter_quant_params.push_back(std::pair<float, int32_t>(0.25, 0));
+  filter_quant_params.push_back(std::pair<float, int32_t>(1, 0));
+  filter_quant_params.push_back(std::pair<float, int32_t>(0.125, 0));
+
+  std::vector<float> filter_scales;
+  std::vector<int32_t> filter_zerops;
+  for (auto iter : filter_quant_params)
+  {
+    filter_scales.push_back(iter.first);
+    filter_zerops.push_back(iter.second);
+  }
+
+  std::vector<float> bias_scales;
+  for (int i = 0; i < output_channels; ++i)
+    bias_scales.push_back(filter_quant_params[i].first * input_quant_param.first);
+  std::vector<int32_t> zerop(output_channels, 0);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::S8>(filter_shape, filter_scales, filter_zerops,
+                                                       3, filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_scales, zerop, 0, bias_data,
+                                                      _memory_manager.get());
+  Tensor output_tensor =
+    makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+  Tensor scratchpad(DataType::S8, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 1;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::NONE;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, output_quant_param.first));
+}
+
+TEST_F(DepthwiseConv2DTest, InvalidBiasType_NEG)
+{
+  Shape input_shape{1, 4, 2, 2};
+  Shape filter_shape{1, 2, 2, 4};
+  Shape bias_shape{4};
+  std::vector<float> input_data{
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<int32_t> bias_data{1, 2, 3, 4};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DepthwiseConv2DTest, InOutTypeMismatch_NEG)
+{
+  Shape input_shape{1, 4, 2, 2};
+  Shape filter_shape{1, 2, 2, 4};
+  Shape bias_shape{4};
+  std::vector<float> input_data{
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+  Tensor scratchpad(DataType::U8, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DepthwiseConv2DTest, InvalidInputShape_NEG)
+{
+  Shape input_shape{4, 2, 2};
+  Shape filter_shape{2, 2, 4};
+  Shape bias_shape{4};
+  std::vector<float> input_data{
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DepthwiseConv2DTest, InvalidFilterShape_NEG)
+{
+  Shape input_shape{1, 4, 2, 2};
+  Shape filter_shape{2, 1, 2, 4};
+  Shape bias_shape{4};
+  std::vector<float> input_data{
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DepthwiseConv2DTest, InvalidBiasDim_NEG)
+{
+  Shape input_shape{1, 4, 2, 2};
+  Shape filter_shape{1, 2, 4, 2};
+  Shape bias_shape{4};
+  std::vector<float> input_data{
+    1,  2,  7,  8,  //
+    3,  4,  9,  10, //
+    5,  6,  11, 12, //
+    13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+    1,  2,   3,   4,   //
+    -9, 10,  -11, 12,  //
+    5,  6,   7,   8,   //
+    13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data, _memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor scratchpad(DataType::FLOAT32, Shape({}), {}, "");
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, &scratchpad,
+                         params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.cpp
new file mode 100644
index 000000000..96399e5c7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Dequantize.h"
+#include "kernels/Utils.h"
+#include "PALDequantize.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Dequantize::Dequantize(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Dequantize::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == loco::DataType::S8 ||
+                         input()->element_type() == loco::DataType::U8 ||
+                         input()->element_type() == loco::DataType::S16);
+
+  LUCI_INTERPRETER_CHECK(input()->scales().size() == 1);
+
+  if (input()->element_type() == loco::DataType::S16)
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0);
+
+  LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::FLOAT32);
+
+  output()->resize(input()->shape());
+}
+
+void Dequantize::execute() const
+{
+  tflite::DequantizationParams op_params;
+  op_params.zero_point = input()->zero_point();
+  op_params.scale = input()->scale();
+
+  switch (input()->element_type())
+  {
+    case loco::DataType::U8:
+    {
+      luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
+                                       getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                       getTensorData<float>(output()));
+      break;
+    }
+    case loco::DataType::S8:
+    {
+      luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
+                                       getTensorData<int8_t>(input()), getTensorShape(output()),
+                                       getTensorData<float>(output()));
+      break;
+    }
+    case loco::DataType::S16:
+    {
+      luci_interpreter_pal::Dequantize(op_params, getTensorShape(input()),
+                                       getTensorData<int16_t>(input()), getTensorShape(output()),
+                                       getTensorData<float>(output()));
+      break;
+    }
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.h b/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.h
new file mode 100644
index 000000000..5565df0e4
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H
+#define LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Dequantize : public Kernel
+{
+public:
+  Dequantize(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_DEQUANTIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.test.cpp
new file mode 100644
index 000000000..0cab633d6
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Dequantize.test.cpp
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Dequantize.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class DequantizeTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(DequantizeTest, Uint8)
+{
+  std::vector<uint8_t> input_data{0, 1, 2, 3, 4, 251, 252, 253, 254, 255};
+
+  std::vector<float> ref_output_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64};
+
+  Tensor input_tensor(loco::DataType::U8, {2, 5}, {{0.5}, {127}}, "");
+
+  _memory_manager->allocate_memory(input_tensor);
+  input_tensor.writeData(input_data.data(), input_data.size() * sizeof(uint8_t));
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Dequantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(DequantizeTest, Sint8)
+{
+  std::vector<int8_t> input_data{-128, -127, -126, -125, -124, 123, 124, 125, 126, 127};
+
+  std::vector<float> ref_output_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64};
+
+  Tensor input_tensor(loco::DataType::S8, {2, 5}, {{0.5}, {-1}}, "");
+
+  _memory_manager->allocate_memory(input_tensor);
+  input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int8_t));
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Dequantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(DequantizeTest, Sint16)
+{
+  std::vector<int16_t> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131};
+
+  std::vector<float> ref_output_data{-64.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 65.5};
+
+  Tensor input_tensor(loco::DataType::S16, {2, 5}, {{0.5}, {0}}, "");
+
+  _memory_manager->allocate_memory(input_tensor);
+  input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int16_t));
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Dequantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(DequantizeTest, InvalidInputType_NEG)
+{
+  std::vector<float> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Dequantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DequantizeTest, InvalidOutputType_NEG)
+{
+  std::vector<int16_t> input_data{-129, -126, -125, -124, -123, 124, 125, 126, 127, 131};
+
+  Tensor input_tensor(loco::DataType::S16, {2, 5}, {{0.5}, {0}}, "");
+
+  _memory_manager->allocate_memory(input_tensor);
+  input_tensor.writeData(input_data.data(), input_data.size() * sizeof(int16_t));
+
+  Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+  Dequantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DequantizeTest, InvalidInputZeroPoint_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({2, 5}, 0.5, -1, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Dequantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Div.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Div.cpp
new file mode 100644
index 000000000..dd1532278
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Div.cpp
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Div.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/div.h>
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Div::Div(const Tensor *input1, const Tensor *input2, Tensor *output, const DivParams &params)
+  : KernelWithParams<DivParams>({input1, input2}, {output}, params)
+{
+}
+
+void Div::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
+
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Div::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Div::evalFloat() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<float>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastDivSlow(
+      params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+      getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<float>(input1()),
+                               getTensorShape(input2()), getTensorData<float>(input2()),
+                               getTensorShape(output()), getTensorData<float>(output()));
+  }
+}
+
+template <typename T> void Div::evalInteger() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<T>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastDivSlow(
+      params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+      getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<T>(input1()),
+                               getTensorShape(input2()), getTensorData<T>(input2()),
+                               getTensorShape(output()), getTensorData<T>(output()));
+  }
+}
+
+void Div::evalQuantized() const
+{
+  const auto input1_scale = static_cast<double>(input1()->scale());
+  const auto input2_scale = static_cast<double>(input2()->scale());
+  const auto output_scale = static_cast<double>(output()->scale());
+
+  const double real_output_multiplier = input1_scale / (input2_scale * output_scale);
+
+  int32_t output_multiplier{};
+  int output_shift{};
+
+  quantizeMultiplier(real_output_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::ArithmeticParams params{};
+
+  params.input1_offset = -input1()->zero_point(); // Note the '-'.
+  params.input2_offset = -input2()->zero_point(); // Note the '-'.
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = output_multiplier;
+  params.output_shift = output_shift;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastDivSlow(
+      params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
+      getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
+                               getTensorShape(input2()), getTensorData<uint8_t>(input2()),
+                               getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Div.h b/compiler/luci-micro/luci-interpreter/src/kernels/Div.h
new file mode 100644
index 000000000..c1bf3e10b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Div.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_DIV_H
+#define LUCI_INTERPRETER_KERNELS_DIV_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Div : public KernelWithParams<DivParams>
+{
+public:
+  Div(const Tensor *input1, const Tensor *input2, Tensor *output, const DivParams &params);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  template <typename T> void evalInteger() const;
+  void evalQuantized() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_DIV_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Div.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Div.test.cpp
new file mode 100644
index 000000000..85cd8b90a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Div.test.cpp
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Div.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class DivTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+float GetTolerance(float min, float max)
+{
+  const float kQuantizedStep = (max - min) / 255.0f;
+  const float kQuantizedTolerance = 2.0f * kQuantizedStep + kQuantizedStep * kQuantizedStep;
+  return kQuantizedTolerance;
+}
+
+TEST_F(DivTest, Float)
+{
+  Shape base_shape = {2, 3, 1, 1};
+
+  std::vector<int32_t> output_shape = {2, 3, 1, 1};
+
+  std::vector<float> input1_data{0.3f, 2.3f, 0.9f, 0.5f, 0.8f, 1.1f};
+  std::vector<float> input2_data{0.2f, 1.6f, 0.5f, 0.4f, 1.6f, 0.4f};
+  std::vector<float> test_outputs{1.5f, 1.4375f, 1.8f, 1.25f, 0.5f, 2.75f};
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>(base_shape, input2_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  DivParams params{};
+  params.activation = Activation::RELU;
+
+  Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST_F(DivTest, FloatBroadcast)
+{
+  Shape input1_shape = {1, 3};
+  Shape input2_shape = {3, 1};
+
+  std::vector<float> input1_data{-0.3f, 2.3f, 0.9f};
+  std::vector<float> input2_data{0.2f, 1.6f, 0.5f};
+  std::vector<float> test_outputs{0.f, 11.5f, 4.5f, 0.f, 1.4375f, 0.5625f, 0.f, 4.6f, 1.8f};
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  DivParams params{};
+  params.activation = Activation::RELU;
+
+  Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f));
+}
+
+TEST_F(DivTest, Uint8)
+{
+  Shape base_shape = {1, 2, 2, 1};
+
+  std::vector<int32_t> output_shape = {1, 2, 2, 1};
+
+  std::vector<float> input1_data = {-0.8f, -0.2f, 0.3f, 0.7f};
+  std::vector<float> input2_data = {-0.8f, 0.4f, 0.8f, 1.0f};
+  std::vector<float> test_outputs{1.0f, 0.f, 0.375f, 0.7f};
+
+  const float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.f, 1.f);
+
+  Tensor input1_tensor = makeInputTensor<DataType::U8>(
+    base_shape, quant_param.first, quant_param.second, input1_data, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::U8>(
+    base_shape, quant_param.first, quant_param.second, input2_data, _memory_manager.get());
+
+  Tensor output_tensor =
+    makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+
+  DivParams params{};
+  params.activation = Activation::RELU;
+
+  Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(test_outputs, kQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <loco::DataType DType> void checkInteger(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  Shape base_shape = {2, 3, 1, 2};
+  std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+
+  std::vector<std::vector<dtype>> test_outputs = {{5,  6,  2, 0,  10, 3, //
+                                                   10, 0,  4, 5,  20, 0, //
+                                                   0,  0,  0, 2,  0,  0, //
+                                                   2,  0,  1, 10, 5,  0, //
+                                                   2,  3,  1, 0,  5,  1, //
+                                                   18, 20, 7, 0,  37, 10},
+                                                  {5, 6, 4, 5, 0, 0, 2, 0, 1, 0, 37, 10},
+                                                  {5, 7, 4, 6, 2, 3, 10, 0,  8,  0,  4, 0,
+                                                   0, 0, 0, 0, 0, 0, 0,  10, 5,  0,  1, 0,
+                                                   0, 0, 5, 9, 1, 1, 0,  0,  37, 50, 7, 10},
+                                                  {5, 7, 8, 0, 0, 0, 0, 10, 5, 9, 7, 10}};
+  std::vector<dtype> input1_data{20, 30, 40, -17, -4, -7, 11, -31, 10, 19, 75, 100};
+  std::vector<dtype> input2_data{4, 5, 10, -3, 2, 10};
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+    Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+    Tensor output_tensor = makeOutputTensor(DType);
+
+    DivParams params{};
+    params.activation = Activation::RELU;
+
+    Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+      << "With shape number " << i;
+  }
+}
+
+TEST_F(DivTest, SInt64)
+{
+  checkInteger<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(DivTest, SInt32)
+{
+  checkInteger<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(DivTest, Input_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  DivParams params{};
+  params.activation = Activation::RELU;
+
+  Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(DivTest, Invalid_Input_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U64);
+
+  DivParams params{};
+  params.activation = Activation::RELU;
+
+  Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST_F(DivTest, Invalid_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S32>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  DivParams params{};
+  params.activation = Activation::RELU;
+
+  Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Elu.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Elu.cpp
new file mode 100644
index 000000000..697d63be4
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Elu.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Elu.h"
+#include "kernels/Utils.h"
+
+#include "PALElu.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Elu::Elu(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Elu::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  output()->resize(input()->shape());
+}
+
+void Elu::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      luci_interpreter_pal::Elu(getTensorShape(input()), getTensorData<float>(input()),
+                                getTensorShape(output()), getTensorData<float>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Elu.h b/compiler/luci-micro/luci-interpreter/src/kernels/Elu.h
new file mode 100644
index 000000000..c844ab57f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Elu.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_ELU_H
+#define LUCI_INTERPRETER_KERNELS_ELU_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Elu : public Kernel
+{
+public:
+  Elu(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_ELU_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Elu.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Elu.test.cpp
new file mode 100644
index 000000000..814499cdb
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Elu.test.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Elu.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Elu kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  (void)output_shape;
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
+}
+
+TEST(EluTest, SimpleElu)
+{
+  Check(
+    /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
+    /*input_data=*/
+    {
+      0, -6, 2, -4,    //
+      3, -2, 10, -0.1, //
+    },
+    /*output_data=*/
+    {
+      0.0, -0.997521, 2.0, -0.981684,   //
+      3.0, -0.864665, 10.0, -0.0951626, //
+    });
+}
+
+TEST(EluTest, InOutTypeMismatch_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+    0, -6, 2,  -4,   //
+    3, -2, 10, -0.1, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Elu kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Equal.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Equal.cpp
new file mode 100644
index 000000000..a57e127b7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Equal.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Equal.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Equal::Equal(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void Equal::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+  if (x()->element_type() == DataType::U8)
+  {
+    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+  }
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void Equal::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Equal::evalFloat() const
+{
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowEqual(op_params, getTensorShape(x()), x_data,
+                                                getTensorShape(y()), y_data,
+                                                getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::Equal(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+                                 y_data, getTensorShape(output()), output_data);
+  }
+}
+
+template <typename T> void Equal::evalInteger() const
+{
+  const auto x_data = getTensorData<T>(x());
+  const auto y_data = getTensorData<T>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowEqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                                         getTensorShape(y()), y_data,
+                                                         getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::EqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                          getTensorShape(y()), y_data, getTensorShape(output()),
+                                          output_data);
+  }
+}
+
+void Equal::evalQuantized() const
+{
+  const auto x_data = getTensorData<uint8_t>(x());
+  const auto y_data = getTensorData<uint8_t>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.left_shift = 8;
+  op_params.input1_offset = -x()->zero_point(); // Note the '-'
+  op_params.input1_shift = _x_shift;
+  op_params.input1_multiplier = _x_multiplier;
+  op_params.input2_offset = -y()->zero_point(); // Note the '-'
+  op_params.input2_shift = _y_shift;
+  op_params.input2_multiplier = _y_multiplier;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowEqualWithScaling(op_params, getTensorShape(x()), x_data,
+                                                           getTensorShape(y()), y_data,
+                                                           getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::EqualWithScaling(op_params, getTensorShape(x()), x_data,
+                                            getTensorShape(y()), y_data, getTensorShape(output()),
+                                            output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Equal.h b/compiler/luci-micro/luci-interpreter/src/kernels/Equal.h
new file mode 100644
index 000000000..c9be32cc0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Equal.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_EQUAL_H
+#define LUCI_INTERPRETER_KERNELS_EQUAL_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Equal : public Kernel
+{
+public:
+  Equal(const Tensor *x, const Tensor *y, Tensor *output);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  template <typename T> void evalInteger() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _x_multiplier = 0;
+  int _x_shift = 0;
+  int32_t _y_multiplier = 0;
+  int _y_shift = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_EQUAL_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Equal.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Equal.test.cpp
new file mode 100644
index 000000000..5870e5460
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Equal.test.cpp
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Equal.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class EqualTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(EqualTest, FloatSimple)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+    -1,  0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true, false, // Row 1
+    false, true, false, // Row 2
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(EqualTest, FloatBroardcast)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+    -1,  0,   1,   // Row 3
+    0.9, 0.7, 0.5, // Row 4
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true,  false, // Row 1
+    false, false, false, // Row 2
+    false, false, false, // Row 3
+    true,  true,  true,  // Row 4
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({4, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{min_value, 2, max_value};
+
+  std::vector<dtype> y_data{min_value, -2, max_value};
+
+  std::vector<bool> ref_output_data{true, false, true};
+
+  Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{
+    min_value, 2,  3,         // Row 1
+    4,         5,  max_value, // Row 2
+    -1,        -2, -3,        // Row 3
+    min_value, -2, max_value, // Row 4
+  };
+
+  std::vector<dtype> y_data{
+    min_value, -2, max_value, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  false, false, // Row 1
+    false, false, true,  // Row 2
+    false, true,  false, // Row 3
+    true,  true,  true,  // Row 4
+  };
+
+  Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(EqualTest, Int32)
+{
+  checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(EqualTest, Int64)
+{
+  checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
+// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+const float F_MIN = -128.0 / 128.0;
+const float F_MAX = 127.0 / 128.0;
+
+TEST_F(EqualTest, Uint8Quantized)
+{
+  std::vector<float> x_data{
+    0.5, 0.5, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.5, 0.55, 0.5, // Row 1
+    -1,  0,   0.05, 1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true, false, false, // Row 1
+    false, true, true,  false, // Row 2
+  };
+
+  std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
+
+  std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 2);
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(EqualTest, Uint8QuantizedBroadcast)
+{
+  std::vector<float> x_data{
+    0.4,  -0.8, 0.7,  0.3, // Row 1
+    -0.5, 0.1,  0,    0.5, // Row 2
+    1,    0,    0.05, -1,  // Row 3
+    -1,   0.05, 0,    1,   // Row 4
+  };
+
+  std::vector<float> y_data{
+    -1, 0.05, 0, 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    false, false, false, false, // Row 1
+    false, false, true,  false, // Row 2
+    false, false, false, false, // Row 3
+    true,  true,  true,  true,  // Row 4
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 4, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(EqualTest, Input_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(EqualTest, Input_Output_Type_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(EqualTest, Float_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(EqualTest, Int32_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(EqualTest, Int64_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Exp.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Exp.cpp
new file mode 100644
index 000000000..e7c560a88
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Exp.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Exp.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/exp.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Exp::Exp(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Exp::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  output()->resize(input()->shape());
+}
+
+void Exp::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Exp::evalFloat() const
+{
+  const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
+  tflite::reference_ops::Exp(getTensorData<float>(input()), size, getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Exp.h b/compiler/luci-micro/luci-interpreter/src/kernels/Exp.h
new file mode 100644
index 000000000..429177375
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Exp.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_EXP_H
+#define LUCI_INTERPRETER_KERNELS_EXP_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Exp : public Kernel
+{
+public:
+  Exp(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_EXP_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Exp.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Exp.test.cpp
new file mode 100644
index 000000000..a159d9db9
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Exp.test.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Exp.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(ExpTest, Float)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Shape input_shape{1, 1, 7};
+  std::vector<float> input_data{0.0f, 1.0f, -1.0f, 100.0f, -100.0f, 0.01f, -0.01f};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Exp kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<int32_t> ref_output_shape{1, 1, 7};
+  std::vector<float> ref_output_data{std::exp(0.0f),   std::exp(1.0f),    std::exp(-1.0f),
+                                     std::exp(100.0f), std::exp(-100.0f), std::exp(0.01f),
+                                     std::exp(-0.01f)};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.cpp
new file mode 100644
index 000000000..ba35c99fa
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ExpandDims.h"
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ExpandDims::ExpandDims(const Tensor *input, const Tensor *axis, Tensor *output)
+  : Kernel({input, axis}, {output})
+{
+}
+
+void ExpandDims::configure()
+{
+  int32_t axis_value;
+
+  switch (axis()->element_type())
+  {
+    case loco::DataType::S32:
+      axis_value = *getTensorData<int32_t>(axis());
+      break;
+    case loco::DataType::S64:
+      axis_value = static_cast<int32_t>(*getTensorData<int64_t>(axis()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+
+  const auto input_shape = input()->shape();
+
+  if (axis_value < 0)
+  {
+    axis_value += input_shape.num_dims() + 1;
+  }
+
+  LUCI_INTERPRETER_CHECK(axis_value <= input_shape.num_dims() and axis_value >= 0);
+
+  Shape output_shape(input_shape.num_dims() + 1);
+  for (int32_t i = 0; i < output_shape.num_dims(); ++i)
+  {
+    if (i < axis_value)
+    {
+      output_shape.dim(i) = input_shape.dim(i);
+    }
+    else if (i == axis_value)
+    {
+      output_shape.dim(i) = 1;
+    }
+    else
+    {
+      LUCI_INTERPRETER_CHECK(i >= 1);
+      output_shape.dim(i) = input_shape.dim(i - 1);
+    }
+  }
+
+  output()->resize(output_shape);
+}
+
+void ExpandDims::execute() const
+{
+  // Just copy input to output
+  const auto *input_data = input()->data<void>();
+  auto *output_data = output()->data<void>();
+
+  const size_t element_size = getDataTypeSize(input()->element_type());
+  const int32_t num_elements = input()->shape().num_elements();
+  std::memcpy(output_data, input_data, num_elements * element_size);
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.h b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.h
new file mode 100644
index 000000000..e510b1160
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H
+#define LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ExpandDims : public Kernel
+{
+public:
+  ExpandDims(const Tensor *input, const Tensor *axis, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *axis() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_EXPAND_DIMS_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.test.cpp
new file mode 100644
index 000000000..df9eaccc0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ExpandDims.test.cpp
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ExpandDims.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ExpandDimsTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(ExpandDimsTest, PositiveAxis)
+{
+  std::vector<int32_t> input_data{-1, 1, -2, 2};
+  std::initializer_list<int32_t> input_shape = {2, 2};
+
+  std::initializer_list<int32_t> axis_value = {0};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(input_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2}));
+}
+
+TEST_F(ExpandDimsTest, NegAxis)
+{
+  std::vector<int32_t> input_data{-1, 1, -2, 2};
+  std::initializer_list<int32_t> input_shape = {2, 2};
+
+  std::initializer_list<int32_t> axis_value = {-1};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(input_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 2, 1}));
+}
+
+TEST_F(ExpandDimsTest, InvalidAxisType_NEG)
+{
+  std::vector<int32_t> input_data{-1, 1, -2, 2};
+  std::initializer_list<int32_t> input_shape = {2, 2};
+
+  std::initializer_list<float> axis_value = {1.0};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::FLOAT32>({1}, axis_value, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ExpandDimsTest, InvalidAxisValue_NEG)
+{
+  std::vector<int32_t> input_data{-1, 1, -2, 2};
+  std::initializer_list<int32_t> input_shape = {2, 2};
+
+  std::initializer_list<int32_t> axis_value = {3};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_value, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  ExpandDims kernel(&input_tensor, &axis_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Fill.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Fill.cpp
new file mode 100644
index 000000000..e09d6331a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Fill.cpp
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Fill.h"
+#include "kernels/Utils.h"
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Fill::Fill(const Tensor *dims, const Tensor *value, Tensor *output)
+  : Kernel({dims, value}, {output})
+{
+}
+
+template <typename T> void Fill::configureShape()
+{
+  const auto dims_data = getTensorData<T>(dims());
+  Shape output_shape(dims()->shape().dim(0));
+
+  for (int i = 0; i < output_shape.num_dims(); ++i)
+  {
+    T data = dims_data[i];
+    if (data < 0)
+      throw std::runtime_error("Fill dimensions must be >= 0");
+
+    output_shape.dim(i) = data;
+  }
+
+  output()->resize(output_shape);
+}
+
+void Fill::configure()
+{
+  const auto dims_shape = dims()->shape();
+  const auto value_shape = value()->shape();
+
+  // Make sure the 1st input tensor is 1-D
+  LUCI_INTERPRETER_CHECK(dims_shape.num_dims() == 1);
+
+  // Make sure the 1st input tensor is int32 or int64
+  LUCI_INTERPRETER_CHECK(dims()->element_type() == DataType::S32 or
+                         dims()->element_type() == DataType::S64);
+
+  // Make sure the 2nd input tensor is a scalar
+  LUCI_INTERPRETER_CHECK(value_shape.num_dims() == 0)
+
+  // Check zero point and scale for S16 and S8
+  if (value()->element_type() == loco::DataType::S16 or
+      value()->element_type() == loco::DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(value()->scale() == output()->scale());
+    LUCI_INTERPRETER_CHECK(value()->zero_point() == output()->zero_point());
+
+    if (value()->element_type() == loco::DataType::S16)
+      LUCI_INTERPRETER_CHECK(value()->zero_point() == 0);
+  }
+  // Resize output
+  switch (dims()->element_type())
+  {
+    case DataType::S32:
+      configureShape<int32_t>();
+      break;
+    case DataType::S64:
+      configureShape<int64_t>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Fill::execute() const
+{
+  switch (output()->element_type())
+  {
+    case DataType::S8:
+      tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int8_t>(value()),
+                                  getTensorShape(output()), getTensorData<int8_t>(output()));
+      break;
+    case DataType::S16:
+      tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int16_t>(value()),
+                                  getTensorShape(output()), getTensorData<int16_t>(output()));
+      break;
+    case DataType::S32:
+      tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int32_t>(value()),
+                                  getTensorShape(output()), getTensorData<int32_t>(output()));
+      break;
+    case DataType::S64:
+      tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<int64_t>(value()),
+                                  getTensorShape(output()), getTensorData<int64_t>(output()));
+      break;
+    case DataType::FLOAT32:
+      tflite::reference_ops::Fill(getTensorShape(value()), getTensorData<float>(value()),
+                                  getTensorShape(output()), getTensorData<float>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Fill.h b/compiler/luci-micro/luci-interpreter/src/kernels/Fill.h
new file mode 100644
index 000000000..184f0cb83
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Fill.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_FILL_H
+#define LUCI_INTERPRETER_KERNELS_FILL_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Fill : public Kernel
+{
+public:
+  Fill(const Tensor *dims, const Tensor *value, Tensor *output);
+
+  const Tensor *dims() const { return _inputs[0]; }
+  const Tensor *value() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> void configureShape();
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_FILL_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Fill.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Fill.test.cpp
new file mode 100644
index 000000000..cf56df507
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Fill.test.cpp
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Fill.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class FillTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+template <typename T, DataType DT> void runFillIntKernel(IMemoryManager *memory_manager)
+{
+  Shape dims_shape{2};
+
+  std::vector<int32_t> dims_data = {2, 3};
+  std::vector<T> value_data = {5};
+
+  Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, memory_manager);
+  Tensor value = makeInputTensor<DT>(/*scalar*/ {}, value_data, memory_manager);
+
+  Tensor output_tensor = makeOutputTensor(DT);
+
+  Fill kernel(&dims, &value, &output_tensor);
+
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<T> ref_output_data{5, 5, 5, 5, 5, 5};
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ref_output_data);
+
+  std::vector<int32_t> ref_output_shape{2, 3};
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+template <DataType DT> void runFillQuantIntKernel(IMemoryManager *memory_manager)
+{
+  Shape dims_shape{2};
+
+  std::vector<int32_t> dims_data = {2, 3};
+  std::vector<float> value_data = {5};
+
+  int32_t zero_point = 0;
+
+  if (DT == loco::DataType::S8)
+    zero_point = 1;
+
+  Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, memory_manager);
+  Tensor value = makeInputTensor<DT>(/*scalar*/ {}, /*scale*/ 0.25, /*zero_point*/ zero_point,
+                                     value_data, memory_manager);
+
+  Tensor output_tensor = makeOutputTensor(DT, /*scale*/ 0.25, /*zero_point*/ zero_point);
+
+  Fill kernel(&dims, &value, &output_tensor);
+
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{5, 5, 5, 5, 5, 5};
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+
+  std::vector<int32_t> ref_output_shape{2, 3};
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FillTest, FillInt)
+{
+  // Run for int32_t input
+  runFillIntKernel<int32_t, loco::DataType::S32>(_memory_manager.get());
+  // Run for int64_t input
+  runFillIntKernel<int64_t, loco::DataType::S64>(_memory_manager.get());
+  // Run for int8_t input
+  runFillQuantIntKernel<loco::DataType::S8>(_memory_manager.get());
+  // Run for int16_t input
+  runFillQuantIntKernel<loco::DataType::S16>(_memory_manager.get());
+
+  SUCCEED();
+}
+
+TEST_F(FillTest, FillFloat)
+{
+  Shape dims_shape{3};
+
+  std::vector<int64_t> dims_data = {2, 2, 2};
+  std::vector<float> value_data = {5};
+
+  Tensor dims = makeInputTensor<loco::DataType::S64>(dims_shape, dims_data, _memory_manager.get());
+  Tensor value =
+    makeInputTensor<loco::DataType::FLOAT32>(/*scalar*/ {}, value_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32);
+
+  Fill kernel(&dims, &value, &output_tensor);
+
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{5, 5, 5, 5, 5, 5, 5, 5};
+
+  std::vector<int32_t> ref_output_shape{2, 2, 2};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), ref_output_data);
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FillTest, Invalid_Input_Shape_NEG)
+{
+  Shape dims_shape{1, 3};
+
+  std::vector<int32_t> dims_data = {2, 2, 2};
+  std::vector<float> value_data = {5};
+
+  Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, _memory_manager.get());
+  Tensor value =
+    makeInputTensor<loco::DataType::FLOAT32>(/*scalar*/ {}, value_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32);
+
+  Fill kernel(&dims, &value, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(FillTest, Invalid_Value_Shape_NEG)
+{
+  Shape dims_shape{3};
+
+  std::vector<int32_t> dims_data = {2, 2, 2};
+  std::vector<float> value_data = {5};
+
+  Tensor dims = makeInputTensor<loco::DataType::S32>(dims_shape, dims_data, _memory_manager.get());
+  Tensor value = makeInputTensor<loco::DataType::FLOAT32>({1}, value_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32);
+
+  Fill kernel(&dims, &value, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Floor.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Floor.cpp
new file mode 100644
index 000000000..e3c4246cc
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Floor.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Floor.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/floor.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Floor::Floor(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Floor::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  output()->resize(input()->shape());
+}
+
+void Floor::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Floor::evalFloat() const
+{
+  tflite::reference_ops::Floor(getTensorShape(input()), getTensorData<float>(input()),
+                               getTensorShape(output()), getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Floor.h b/compiler/luci-micro/luci-interpreter/src/kernels/Floor.h
new file mode 100644
index 000000000..ca3ad5997
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Floor.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_FLOOR_H
+#define LUCI_INTERPRETER_KERNELS_FLOOR_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Floor : public Kernel
+{
+public:
+  Floor(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_FLOOR_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Floor.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Floor.test.cpp
new file mode 100644
index 000000000..30076fb54
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Floor.test.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Floor.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class FloorTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(FloorTest, SimpleFloat)
+{
+  std::initializer_list<int32_t> input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+    0.2, 8.6, 2.4,  4.3,  // Row 1
+    3,   7.1, 10.5, -0.9, // Row 2
+  };
+
+  std::initializer_list<int32_t> ref_output_shape{1, 2, 4, 1};
+  std::vector<float> ref_output_data{
+    0, 8, 2,  4,  // Row 1
+    3, 7, 10, -1, // Row 2
+  };
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Floor kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FloorTest, Input_Output_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  Floor kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.cpp
new file mode 100644
index 000000000..a7a10a336
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/FloorDiv.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/binary_function.h>
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+FloorDiv::FloorDiv(const Tensor *input, const Tensor *alpha, Tensor *output)
+  : Kernel({input, alpha}, {output})
+{
+}
+
+void FloorDiv::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(y()->element_type() == output()->element_type());
+
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void FloorDiv::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void FloorDiv::evalFloat() const
+{
+  auto FloorDivFunc = [](float x, float y) -> float {
+    return std::floor(static_cast<double>(x) / static_cast<double>(y));
+  };
+
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+
+  // Check the denominator
+  for (int i = 0; i < getTensorShape(y()).FlatSize(); ++i)
+  {
+    LUCI_INTERPRETER_CHECK(y_data[i] != 0);
+  }
+
+  if (x()->shape() != y()->shape())
+  {
+    tflite::reference_ops::BroadcastBinaryFunction4DSlow<float, float, float>(
+      getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      getTensorData<float>(output()), FloorDivFunc);
+  }
+  else
+  {
+    tflite::reference_ops::BinaryFunction<float, float, float>(
+      getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      getTensorData<float>(output()), FloorDivFunc);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.h b/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.h
new file mode 100644
index 000000000..e9c47d81a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_FLOOR_DIV_H
+#define LUCI_INTERPRETER_KERNELS_FLOOR_DIV_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class FloorDiv : public Kernel
+{
+public:
+  FloorDiv(const Tensor *x, const Tensor *y, Tensor *output);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_FLOOR_DIV_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.test.cpp
new file mode 100644
index 000000000..3e1b5f18e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/FloorDiv.test.cpp
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/FloorDiv.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class FloorDivTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(FloorDivTest, FloatSimple)
+{
+  Shape x_shape{2, 3};
+  std::vector<float> x_data{
+    0.5, 2.4,  3.1,  // Row 1
+    1.9, -1.9, -2.8, // Row 2
+  };
+
+  Shape y_shape = x_shape;
+  std::vector<float> y_data{
+    2.0, 0.5,  3.0,  // Row 1
+    1.0, -1.0, -2.0, // Row 2
+  };
+
+  std::vector<int32_t> ref_output_shape{2, 3};
+  std::vector<float> ref_output_data{
+    0, 4, 1, // Row 1
+    1, 1, 1, // Row 2
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(x_shape, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(y_shape, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FloorDivTest, FloatBroadcast)
+{
+  Shape x_shape{1, 3};
+  std::vector<float> x_data{
+    0.5, 2.4, -3.1, // Row 1
+  };
+
+  Shape y_shape{3, 3};
+  std::vector<float> y_data{
+    1.0, 1.0,  1.0,  // Row 1
+    2.0, -0.5, -2.0, // Row 2
+    0.3, 0.7,  0.9,  // Row 3
+  };
+
+  std::vector<int32_t> ref_output_shape{3, 3};
+  std::vector<float> ref_output_data{
+    0, 2,  -4, // Row 1
+    0, -5, 1,  // Row 2
+    1, 3,  -4, // Row 3
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(x_shape, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(y_shape, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(FloorDivTest, DivByZero_NEG)
+{
+  Shape shape{3};
+  std::vector<float> x_data{1, 0, -1};
+  std::vector<float> y_data{0, 0, 0};
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(shape, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(shape, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST_F(FloorDivTest, Input_Output_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(FloorDivTest, Input_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.cpp
new file mode 100644
index 000000000..bd2bb2f35
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.cpp
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/FullyConnected.h"
+
+#include "kernels/Utils.h"
+
+#include "PALFullyConnected.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+FullyConnected::FullyConnected(const Tensor *input, const Tensor *weights, const Tensor *bias,
+                               Tensor *output, const FullyConnectedParams &params)
+  : KernelWithParams<FullyConnectedParams>({input, weights, bias}, {output}, params)
+{
+}
+
+void FullyConnected::configure()
+{
+  if (weights()->element_type() == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::U8);
+    LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::U8);
+    LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::S32)
+  }
+  else if (weights()->element_type() == DataType::FLOAT32)
+  {
+    LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::FLOAT32)
+  }
+  else if (weights()->element_type() == DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::S8);
+    LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S8);
+    LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::S32)
+  }
+  else
+  {
+    throw std::runtime_error("Unsupported type.");
+  }
+
+  const Shape &input_shape = input()->shape();
+  const Shape &weights_shape = weights()->shape();
+
+  LUCI_INTERPRETER_CHECK(weights_shape.num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(bias() == nullptr ||
+                         bias()->shape().num_elements() == weights_shape.dim(0));
+
+  LUCI_INTERPRETER_CHECK(input_shape.num_elements() % weights_shape.dim(1) == 0);
+  const int32_t batch_size = input_shape.num_elements() / weights_shape.dim(1);
+  const int32_t num_units = weights_shape.dim(0);
+
+  if (bias())
+    LUCI_INTERPRETER_CHECK(bias()->shape().num_elements() == weights()->shape().dim(0));
+
+  if (params().keep_num_dims == false)
+  {
+    output()->resize({batch_size, num_units});
+  }
+  else
+  {
+    luci_interpreter::Shape output_shape(input_shape.num_dims());
+    for (int i = 0; i < input_shape.num_dims(); ++i)
+      output_shape.dim(i) = input_shape.dim(i);
+    output_shape.dim(input_shape.num_dims() - 1) = num_units;
+    output()->resize(output_shape);
+  }
+}
+
+void FullyConnected::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::U8:
+      evalQuantized();
+      break;
+    case DataType::S8:
+      evalQuantizedS8();
+      break;
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void FullyConnected::evalFloat() const
+{
+  float activation_min{};
+  float activation_max{};
+  calculateActivationRange(_params.activation, &activation_min, &activation_max);
+
+  tflite::FullyConnectedParams params{};
+  params.float_activation_min = activation_min;
+  params.float_activation_max = activation_max;
+  params.weights_format = tflite::FullyConnectedWeightsFormat::kDefault;
+
+  tflite::reference_ops::FullyConnected(
+    params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(weights()),
+    getTensorData<float>(weights()), getTensorShape(bias()), getTensorData<float>(bias()),
+    getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void FullyConnected::evalQuantized() const
+{
+  double real_multiplier = 0.0;
+  int output_shift;
+  int32_t output_activation_min;
+  int32_t output_activation_max;
+  int32_t output_multiplier;
+  real_multiplier =
+    getQuantizedConvolutionMultipler(input()->scale(), weights()->scale(), output()->scale());
+  quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+  calculateActivationRangeQuantized(params().activation, output(), &output_activation_min,
+                                    &output_activation_max);
+
+  int32_t input_offset = -input()->zero_point();
+  int32_t filter_offset = -weights()->zero_point();
+  int32_t output_offset = output()->zero_point();
+
+  tflite::FullyConnectedParams op_params{};
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  op_params.output_shift = output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+  op_params.lhs_cacheable = false;
+  op_params.rhs_cacheable = false;
+  tflite::reference_ops::FullyConnected(
+    op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(weights()),
+    getTensorData<uint8_t>(weights()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
+    getTensorShape(output()), getTensorData<uint8_t>(output()));
+}
+
+void FullyConnected::evalQuantizedS8() const
+{
+  double real_multiplier = 0.0;
+  int output_shift;
+  int32_t output_activation_min;
+  int32_t output_activation_max;
+  int32_t output_multiplier;
+  real_multiplier =
+    getQuantizedConvolutionMultipler(input()->scale(), weights()->scale(), output()->scale());
+  quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+  calculateActivationRangeQuantized(params().activation, output(), &output_activation_min,
+                                    &output_activation_max);
+
+  int32_t input_offset = -input()->zero_point();
+  int32_t filter_offset = -weights()->zero_point();
+  int32_t output_offset = output()->zero_point();
+
+  tflite::FullyConnectedParams op_params{};
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  op_params.output_shift = output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+  op_params.lhs_cacheable = false;
+  op_params.rhs_cacheable = false;
+  luci_interpreter_pal::FullyConnected<int8_t>(
+    op_params, getTensorShape(input()), getTensorData<int8_t>(input()), getTensorShape(weights()),
+    getTensorData<int8_t>(weights()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
+    getTensorShape(output()), getTensorData<int8_t>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.h b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.h
new file mode 100644
index 000000000..2a7c068c0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_FULLYCONNECTED_H
+#define LUCI_INTERPRETER_KERNELS_FULLYCONNECTED_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class FullyConnected : public KernelWithParams<FullyConnectedParams>
+{
+public:
+  FullyConnected(const Tensor *input, const Tensor *weights, const Tensor *bias, Tensor *output,
+                 const FullyConnectedParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *weights() const { return _inputs[1]; }
+  const Tensor *bias() const { return _inputs[2]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void evalQuantizedS8() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_FULLYCONNECTED_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.test.cpp
new file mode 100644
index 000000000..4474cc4fb
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/FullyConnected.test.cpp
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/FullyConnected.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> weights_shape,
+           std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> weights_data,
+           std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor weights_tensor =
+    makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FullyConnectedParams params{};
+  params.activation = Activation::RELU;
+
+  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
+}
+
+template <>
+void Check<int8_t>(std::initializer_list<int32_t> input_shape,
+                   std::initializer_list<int32_t> weights_shape,
+                   std::initializer_list<int32_t> bias_shape,
+                   std::initializer_list<int32_t> output_shape,
+                   std::initializer_list<float> input_data,
+                   std::initializer_list<float> weights_data,
+                   std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  const float quantized_tolerance = getTolerance(-127, 128, 255);
+  std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-63.5, 64);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-127, 128);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, memory_manager.get());
+  Tensor weights_tensor =
+    makeInputTensor<DataType::S8>(weights_shape, input_quant_param.first, input_quant_param.second,
+                                  weights_data, memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::S32>(bias_shape, input_quant_param.first * input_quant_param.first, 0,
+                                   bias_data, memory_manager.get());
+  Tensor output_tensor =
+    makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+
+  FullyConnectedParams params{};
+  params.activation = Activation::RELU;
+
+  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, quantized_tolerance));
+}
+
+template <>
+void Check<uint8_t>(
+  std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> weights_shape,
+  std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
+  std::initializer_list<float> input_data, std::initializer_list<float> weights_data,
+  std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  const float quantized_tolerance = getTolerance(-127, 128, 255);
+  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, memory_manager.get());
+  Tensor weights_tensor =
+    makeInputTensor<DataType::U8>(weights_shape, input_quant_param.first, input_quant_param.second,
+                                  weights_data, memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::S32>(bias_shape, input_quant_param.first * input_quant_param.first, 0,
+                                   bias_data, memory_manager.get());
+  Tensor output_tensor =
+    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+
+  FullyConnectedParams params{};
+  params.activation = Activation::RELU;
+
+  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, quantized_tolerance));
+}
+
+template <typename T> class FullyConnectedTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
+TYPED_TEST_SUITE(FullyConnectedTest, DataTypes);
+
+TYPED_TEST(FullyConnectedTest, Simple)
+{
+  Check<TypeParam>({3, 2, 2, 1}, {3, 6}, {3}, {2, 3},
+                   {
+                     -3, -5, 5, 4, 9, -2,  // batch = 0
+                     -3, -2, -4, 9, -8, 1, // batch = 1
+                   },
+                   {
+                     -3, -7, 4, -4, -6, 4, // unit = 0
+                     3, 5, 2, 3, -3, -8,   // unit = 1
+                     -3, 7, 4, 9, 0, -5,   // unit = 2
+                   },
+                   {-1, -5, -8},
+                   {
+                     0, 0, 32,   // batch = 0
+                     22, 11, 47, // batch = 1
+                   });
+}
+
+TEST(FullyConnectedTest, InvalidBiasType_NEG)
+{
+  Shape input_shape{3, 2, 2, 1};
+  std::vector<float> input_data{
+    -3, -5, 5,  4, 9,  -2, // batch = 0
+    -3, -2, -4, 9, -8, 1,  // batch = 1
+  };
+  Shape weights_shape{3, 6};
+  std::vector<float> weights_data{
+    -3, -7, 4, -4, -6, 4,  // unit = 0
+    3,  5,  2, 3,  -3, -8, // unit = 1
+    -3, 7,  4, 9,  0,  -5, // unit = 2
+  };
+  Shape bias_shape{3};
+  std::vector<int32_t> bias_data{-1, -5, -8};
+
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor weights_tensor =
+    makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FullyConnectedParams params{};
+  params.activation = Activation::RELU;
+
+  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(FullyConnectedTest, InvalidWeightShapeDim_NEG)
+{
+  Shape input_shape{3, 2, 2, 1};
+  std::vector<float> input_data{
+    -3, -5, 5,  4, 9,  -2, // batch = 0
+    -3, -2, -4, 9, -8, 1,  // batch = 1
+  };
+  Shape weights_shape{1, 3, 6};
+  std::vector<float> weights_data{
+    -3, -7, 4, -4, -6, 4,  // unit = 0
+    3,  5,  2, 3,  -3, -8, // unit = 1
+    -3, 7,  4, 9,  0,  -5, // unit = 2
+  };
+  Shape bias_shape{3};
+  std::vector<float> bias_data{-1, -5, -8};
+
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor weights_tensor =
+    makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FullyConnectedParams params{};
+  params.activation = Activation::RELU;
+
+  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(FullyConnectedTest, BiasElementNumWeightDimMismatch_NEG)
+{
+  Shape input_shape{3, 2, 2, 1};
+  std::vector<float> input_data{
+    -3, -5, 5,  4, 9,  -2, // batch = 0
+    -3, -2, -4, 9, -8, 1,  // batch = 1
+  };
+  Shape weights_shape{6, 3};
+  std::vector<float> weights_data{
+    -3, -7, 4,  // unit = 0
+    -4, -6, 4,  // unit = 1
+    3,  5,  2,  // unit = 2
+    3,  -3, -8, // unit = 3
+    -3, 7,  4,  // unit = 4
+    9,  0,  -5, // unit = 5
+  };
+  Shape bias_shape{3};
+  std::vector<float> bias_data{-1, -5, -8};
+
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor weights_tensor =
+    makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data, memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FullyConnectedParams params{};
+  params.activation = Activation::RELU;
+
+  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Gather.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Gather.cpp
new file mode 100644
index 000000000..f1256660f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Gather.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Gather.h"
+#include "kernels/Utils.h"
+#include "PALGather.h"
+
+#include <stdexcept>
+#include <cassert>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Gather::Gather(const Tensor *params, const Tensor *indices, Tensor *output,
+               const GatherParams &gparams)
+  : KernelWithParams<GatherParams>({params, indices}, {output}, gparams)
+{
+}
+
+void Gather::configure()
+{
+  if (params()->element_type() == DataType::FLOAT32)
+  {
+    LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
+  }
+  else
+  {
+    throw std::runtime_error("Unsupported type.");
+  }
+
+  LUCI_INTERPRETER_CHECK(indices()->element_type() == DataType::S32 ||
+                         indices()->element_type() == DataType::S64);
+
+  // refer tensorflow/lite/kernels/gather.cc
+
+  const Shape &params_shape = params()->shape();
+  const Shape &indices_shape = indices()->shape();
+
+  int axis = _params.axis;
+  if (axis < 0)
+  {
+    axis += params_shape.num_dims();
+  }
+  LUCI_INTERPRETER_CHECK(0 <= axis && axis < params_shape.num_dims());
+
+  int batch_dims = _params.batch_dims;
+  // batch_dims should be in range: [-rank(indices), rank(indices)].
+  // Negative batch_dims is added with rank of positions.
+  if (batch_dims < 0)
+  {
+    batch_dims += indices_shape.num_dims();
+  }
+  LUCI_INTERPRETER_CHECK(batch_dims <= axis);
+  LUCI_INTERPRETER_CHECK(0 <= batch_dims && batch_dims < params_shape.num_dims());
+  LUCI_INTERPRETER_CHECK(batch_dims <= indices_shape.num_dims());
+  for (int i = 0; i < batch_dims; ++i)
+  {
+    LUCI_INTERPRETER_CHECK(params_shape.dim(i) == indices_shape.dim(i));
+  }
+
+  const int num_dimensions = params_shape.num_dims() + indices_shape.num_dims() - 1 - batch_dims;
+
+  Shape output_shape(num_dimensions);
+  int output_index = 0;
+  for (int i = 0; i < axis; ++i)
+  {
+    output_shape.dim(output_index++) = params_shape.dim(i);
+  }
+  for (int i = batch_dims; i < indices_shape.num_dims(); ++i)
+  {
+    output_shape.dim(output_index++) = indices_shape.dim(i);
+  }
+  for (int i = axis + 1; i < params_shape.num_dims(); ++i)
+  {
+    output_shape.dim(output_index++) = params_shape.dim(i);
+  }
+  output()->resize(output_shape);
+}
+
+void Gather::execute() const
+{
+  switch (params()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Gather::evalFloat() const
+{
+  assert(indices()->element_type() == DataType::S32 || indices()->element_type() == DataType::S64);
+
+  const auto params_data = getTensorData<float>(params());
+  auto output_data = getTensorData<float>(output());
+
+  tflite::GatherParams tparams;
+  tparams.axis = _params.axis;
+  tparams.batch_dims = _params.batch_dims;
+
+  if (indices()->element_type() == DataType::S32)
+  {
+    const auto indices_data = getTensorData<int32_t>(indices());
+
+    luci_interpreter_pal::Gather<float, int32_t>(tparams, getTensorShape(params()), params_data,
+                                                 getTensorShape(indices()), indices_data,
+                                                 getTensorShape(output()), output_data);
+  }
+  else
+  {
+    const auto indices_data = getTensorData<int64_t>(indices());
+
+    luci_interpreter_pal::Gather<float, int64_t>(tparams, getTensorShape(params()), params_data,
+                                                 getTensorShape(indices()), indices_data,
+                                                 getTensorShape(output()), output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Gather.h b/compiler/luci-micro/luci-interpreter/src/kernels/Gather.h
new file mode 100644
index 000000000..cc02d64fb
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Gather.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_GATHER_H
+#define LUCI_INTERPRETER_KERNELS_GATHER_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Gather : public KernelWithParams<GatherParams>
+{
+public:
+  Gather(const Tensor *params, const Tensor *indices, Tensor *output, const GatherParams &gparams);
+
+  const Tensor *params() const { return _inputs[0]; }
+  const Tensor *indices() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_GATHER_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Gather.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Gather.test.cpp
new file mode 100644
index 000000000..4b3dda708
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Gather.test.cpp
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Gather.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class GatherTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(GatherTest, Simple)
+{
+  std::vector<float> params_data{1.f, 2.f, 3.f, 4.f, 5.f, 6.f};
+  std::vector<int32_t> indices_data{1, 0, 1, 5};
+  std::vector<float> ref_output_data{2.f, 1.f, 2.f, 6.f};
+
+  Tensor params_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 6}, params_data, _memory_manager.get());
+  Tensor indices_tensor = makeInputTensor<DataType::S32>({4}, indices_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  GatherParams gparams;
+
+  gparams.axis = 1;
+  gparams.batch_dims = 0;
+
+  Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4}));
+}
+
+TEST_F(GatherTest, Simple_Batch)
+{
+  Shape params_shape = {3, 5};
+  Shape indices_shape = {3, 2};
+  std::vector<float> params_data{0., 0., 1., 0., 2., 3., 0., 0., 0., 4., 0., 5., 0., 6., 0.};
+  std::vector<int32_t> indices_data{2, 4, 0, 4, 1, 3};
+  std::vector<float> ref_output_data{1., 2., 3., 4., 5., 6.};
+
+  Tensor params_tensor =
+    makeInputTensor<DataType::FLOAT32>(params_shape, params_data, _memory_manager.get());
+  Tensor indices_tensor =
+    makeInputTensor<DataType::S32>(indices_shape, indices_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  GatherParams gparams;
+
+  gparams.axis = 1;
+  gparams.batch_dims = 1;
+
+  Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 2}));
+}
+
+TEST_F(GatherTest, Simple_NEG)
+{
+  Tensor params_tensor = makeInputTensor<DataType::S32>({1}, {1}, _memory_manager.get());
+  Tensor indices_tensor = makeInputTensor<DataType::S32>({1}, {0}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  GatherParams gparams;
+
+  Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GatherTest, Axis_NEG)
+{
+  Tensor params_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor indices_tensor = makeInputTensor<DataType::S32>({1}, {0}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  GatherParams gparams;
+
+  gparams.axis = 100;
+  gparams.batch_dims = 0;
+
+  Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GatherTest, Batch_NEG)
+{
+  std::vector<float> params_data{1.f, 2.f, 3.f, 4.f, 5.f, 6.f};
+  std::vector<int32_t> indices_data{1, 0, 1, 5};
+  std::vector<float> ref_output_data{2.f, 1.f, 2.f, 6.f};
+
+  Tensor params_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 6}, params_data, _memory_manager.get());
+  Tensor indices_tensor = makeInputTensor<DataType::S32>({4}, indices_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  GatherParams gparams;
+
+  gparams.axis = 0;
+  gparams.batch_dims = 1;
+
+  Gather kernel(&params_tensor, &indices_tensor, &output_tensor, gparams);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Greater.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Greater.cpp
new file mode 100644
index 000000000..5ccae3c38
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Greater.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Greater.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Greater::Greater(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void Greater::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+  if (x()->element_type() == DataType::U8)
+  {
+    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+  }
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void Greater::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Greater::evalFloat() const
+{
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowGreater(op_params, getTensorShape(x()), x_data,
+                                                  getTensorShape(y()), y_data,
+                                                  getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::Greater(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+                                   y_data, getTensorShape(output()), output_data);
+  }
+}
+
+template <typename T> void Greater::evalInteger() const
+{
+  const auto x_data = getTensorData<T>(x());
+  const auto y_data = getTensorData<T>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowGreaterNoScaling(op_params, getTensorShape(x()), x_data,
+                                                           getTensorShape(y()), y_data,
+                                                           getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::GreaterNoScaling(op_params, getTensorShape(x()), x_data,
+                                            getTensorShape(y()), y_data, getTensorShape(output()),
+                                            output_data);
+  }
+}
+
+void Greater::evalQuantized() const
+{
+  const auto x_data = getTensorData<uint8_t>(x());
+  const auto y_data = getTensorData<uint8_t>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.left_shift = 8;
+  op_params.input1_offset = -x()->zero_point(); // Note the '-'
+  op_params.input1_shift = _x_shift;
+  op_params.input1_multiplier = _x_multiplier;
+  op_params.input2_offset = -y()->zero_point(); // Note the '-'
+  op_params.input2_shift = _y_shift;
+  op_params.input2_multiplier = _y_multiplier;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowGreaterWithScaling(op_params, getTensorShape(x()), x_data,
+                                                             getTensorShape(y()), y_data,
+                                                             getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::GreaterWithScaling(op_params, getTensorShape(x()), x_data,
+                                              getTensorShape(y()), y_data, getTensorShape(output()),
+                                              output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Greater.h b/compiler/luci-micro/luci-interpreter/src/kernels/Greater.h
new file mode 100644
index 000000000..065f76d7b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Greater.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_GREATER_H
+#define LUCI_INTERPRETER_KERNELS_GREATER_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Greater : public Kernel
+{
+public:
+  Greater(const Tensor *x, const Tensor *y, Tensor *output);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  template <typename T> void evalInteger() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _x_multiplier = 0;
+  int _x_shift = 0;
+  int32_t _y_multiplier = 0;
+  int _y_shift = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_GREATER_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Greater.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Greater.test.cpp
new file mode 100644
index 000000000..a48080124
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Greater.test.cpp
@@ -0,0 +1,334 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Greater.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class GreaterTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(GreaterTest, FloatSimple)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+    -1,  0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    false, false, true,  // Row 1
+    true,  false, false, // Row 2
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(GreaterTest, FloatBroardcast)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+    -1,  0,   1,   // Row 3
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    false, false, true,  // Row 1
+    true,  false, false, // Row 2
+    false, false, true,  // Row 3
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{min_value, 2, max_value};
+
+  std::vector<dtype> y_data{min_value + 1, -2, max_value};
+
+  std::vector<bool> ref_output_data{false, true, false};
+
+  Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{
+    min_value, 2,  3,         // Row 1
+    4,         5,  max_value, // Row 2
+    -1,        -4, -3,        // Row 3
+    min_value, -2, max_value, // Row 4
+  };
+
+  std::vector<dtype> y_data{
+    min_value + 1, -2, max_value - 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true,  false, // Row 1
+    true,  true,  true,  // Row 2
+    true,  false, false, // Row 3
+    false, false, true,  // Row 4
+  };
+
+  Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(GreaterTest, Int32)
+{
+  checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(GreaterTest, Int64)
+{
+  checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
+// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+const float F_MIN = -128.0 / 128.0;
+const float F_MAX = 127.0 / 128.0;
+
+TEST_F(GreaterTest, Uint8Quantized)
+{
+  std::vector<float> x_data{
+    0.5, 0.6, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.6,  0.6, 0.5, // Row 1
+    -1,  0.05, 0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    false, false, true, true,  // Row 1
+    true,  false, true, false, // Row 2
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(GreaterTest, Uint8QuantizedRescale)
+{
+  std::vector<float> x_data{
+    0.5, 0.6, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.6,  0.6, 0.5, // Row 1
+    -1,  0.05, 0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    false, false, true, true,  // Row 1
+    true,  false, true, false, // Row 2
+  };
+
+  std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 3);
+
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(GreaterTest, Uint8QuantizedBroadcast)
+{
+  std::vector<float> x_data{
+    0.4,  -0.8, 0.7,  0.3, // Row 1
+    -0.5, 0.1,  0,    0.5, // Row 2
+    1,    0,    0.05, -1,  // Row 3
+  };
+
+  std::vector<float> y_data{
+    -1, 0.05, 0, 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    true, false, true,  false, // Row 1
+    true, true,  false, false, // Row 2
+    true, false, true,  false, // Row 3
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(GreaterTest, Input_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterTest, Input_Output_Type_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterTest, Float_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterTest, Int32_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterTest, Int64_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.cpp
new file mode 100644
index 000000000..27e42c971
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.cpp
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/GreaterEqual.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+GreaterEqual::GreaterEqual(const Tensor *x, const Tensor *y, Tensor *output)
+  : Kernel({x, y}, {output})
+{
+}
+
+void GreaterEqual::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+  if (x()->element_type() == DataType::U8)
+  {
+    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+  }
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void GreaterEqual::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void GreaterEqual::evalFloat() const
+{
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowGreaterEqual(op_params, getTensorShape(x()), x_data,
+                                                       getTensorShape(y()), y_data,
+                                                       getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::GreaterEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+                                        y_data, getTensorShape(output()), output_data);
+  }
+}
+
+template <typename T> void GreaterEqual::evalInteger() const
+{
+  const auto x_data = getTensorData<T>(x());
+  const auto y_data = getTensorData<T>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowGreaterEqualNoScaling(
+      op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      output_data);
+  }
+  else
+  {
+    tflite::reference_ops::GreaterEqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                                 getTensorShape(y()), y_data,
+                                                 getTensorShape(output()), output_data);
+  }
+}
+
+void GreaterEqual::evalQuantized() const
+{
+  const auto x_data = getTensorData<uint8_t>(x());
+  const auto y_data = getTensorData<uint8_t>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.left_shift = 8;
+  op_params.input1_offset = -x()->zero_point(); // Note the '-'
+  op_params.input1_shift = _x_shift;
+  op_params.input1_multiplier = _x_multiplier;
+  op_params.input2_offset = -y()->zero_point(); // Note the '-'
+  op_params.input2_shift = _y_shift;
+  op_params.input2_multiplier = _y_multiplier;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowGreaterEqualWithScaling(
+      op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      output_data);
+  }
+  else
+  {
+    tflite::reference_ops::GreaterEqualWithScaling(op_params, getTensorShape(x()), x_data,
+                                                   getTensorShape(y()), y_data,
+                                                   getTensorShape(output()), output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.h b/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.h
new file mode 100644
index 000000000..e333c30a6
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_GREATER_EQUAL_H
+#define LUCI_INTERPRETER_KERNELS_GREATER_EQUAL_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class GreaterEqual : public Kernel
+{
+public:
+  GreaterEqual(const Tensor *x, const Tensor *y, Tensor *output);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  template <typename T> void evalInteger() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _x_multiplier = 0;
+  int _x_shift = 0;
+  int32_t _y_multiplier = 0;
+  int _y_shift = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_GREATER_EQUAL_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.test.cpp
new file mode 100644
index 000000000..35bf88eab
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/GreaterEqual.test.cpp
@@ -0,0 +1,333 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/GreaterEqual.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class GreaterEqualTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(GreaterEqualTest, FloatSimple)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+    -1,  0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true, true,  // Row 1
+    true,  true, false, // Row 2
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(GreaterEqualTest, FloatBroardcast)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+    -1,  0,   1,   // Row 3
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true,  true,  // Row 1
+    true,  false, false, // Row 2
+    false, false, true,  // Row 3
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3}));
+}
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{min_value, 2, max_value};
+
+  std::vector<dtype> y_data{min_value + 1, -2, max_value};
+
+  std::vector<bool> ref_output_data{false, true, true};
+
+  Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{
+    min_value, 2,  3,             // Row 1
+    4,         5,  max_value,     // Row 2
+    -1,        -4, -3,            // Row 3
+    min_value, -2, max_value - 1, // Row 4
+  };
+
+  std::vector<dtype> y_data{
+    min_value + 1, -2, max_value - 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true,  false, // Row 1
+    true,  true,  true,  // Row 2
+    true,  false, false, // Row 3
+    false, true,  true,  // Row 4
+  };
+
+  Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(GreaterEqualTest, Int32)
+{
+  checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(GreaterEqualTest, Int64)
+{
+  checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
+// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+const float F_MIN = -128.0 / 128.0;
+const float F_MAX = 127.0 / 128.0;
+
+TEST_F(GreaterEqualTest, Uint8Quantized)
+{
+  std::vector<float> x_data{
+    0.5, 0.6, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.6,  0.55, 0.5, // Row 1
+    -1,  0.05, 0,    1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true,  true, true,  // Row 1
+    true,  false, true, false, // Row 2
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(GreaterEqualTest, Uint8QuantizedRescale)
+{
+  std::vector<float> x_data{
+    0.5, 0.5, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.5,  0.6, 0.5, // Row 1
+    -1,  0.05, 0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true,  true, true,  // Row 1
+    true,  false, true, false, // Row 2
+  };
+
+  std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5);
+
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(GreaterEqualTest, Uint8QuantizedBroadcast)
+{
+  std::vector<float> x_data{
+    0.4,  -0.8, 0.7,  0.3, // Row 1
+    -0.5, 0.1,  0,    0.5, // Row 2
+    1,    0,    0.05, -1,  // Row 3
+  };
+
+  std::vector<float> y_data{
+    -1, 0.05, 0, 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    true, false, true, false, // Row 1
+    true, true,  true, false, // Row 2
+    true, false, true, false, // Row 3
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(GreaterEqualTest, Input_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterEqualTest, Input_Output_Type_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterEqualTest, Float_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterEqualTest, Int32_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(GreaterEqualTest, Int64_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/If.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/If.cpp
new file mode 100644
index 000000000..971708bca
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/If.cpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/If.h"
+#include "kernels/Utils.h"
+
+#include <cstring>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+static std::vector<const Tensor *> joinInputs(const Tensor *cond,
+                                              const std::vector<const Tensor *> &inputs)
+{
+  std::vector<const Tensor *> result{cond};
+  result.insert(result.cend(), inputs.cbegin(), inputs.cend());
+  return result;
+}
+
+If::If(const Tensor *cond, const std::vector<const Tensor *> &inputs, std::vector<Tensor *> outputs,
+       RuntimeGraph *then_graph, RuntimeGraph *else_graph)
+  : Kernel(joinInputs(cond, inputs), std::move(outputs)), _then_graph(then_graph),
+    _else_graph(else_graph)
+{
+}
+
+void If::configure()
+{
+  LUCI_INTERPRETER_CHECK(cond()->element_type() == DataType::BOOL);
+  LUCI_INTERPRETER_CHECK(cond()->shape().num_elements() == 1);
+
+  for (RuntimeGraph *graph : {_then_graph, _else_graph})
+  {
+    (void)graph;
+    LUCI_INTERPRETER_CHECK(graph->getInputTensors().size() == getInputTensors().size() - 1);
+    LUCI_INTERPRETER_CHECK(graph->getOutputTensors().size() == getOutputTensors().size());
+  }
+}
+
+void If::execute() const
+{
+  const bool cond_value = cond()->data<bool>()[0];
+
+  RuntimeGraph *active_graph = cond_value ? _then_graph : _else_graph;
+  const auto &graph_inputs = active_graph->getInputTensors();
+  const auto &graph_outputs = active_graph->getOutputTensors();
+
+  // Copy kernel inputs to active graph inputs.
+  for (size_t i = 0; i < getInputTensors().size() - 1; ++i)
+  {
+    LUCI_INTERPRETER_CHECK(graph_inputs[i]->element_type() == input(i)->element_type());
+    graph_inputs[i]->resize(input(i)->shape());
+
+    const int32_t num_elements = input(i)->shape().num_elements();
+    const std::size_t element_size = getDataTypeSize(input(i)->element_type());
+    // TODO: Think about how allocate memory for output in main graph
+    active_graph->configureAllocations(graph_inputs[i]);
+    std::memcpy(graph_inputs[i]->data<void>(), input(i)->data<void>(), num_elements * element_size);
+  }
+
+  active_graph->execute();
+
+  // Copy graph outputs to kernel outputs.
+  for (size_t i = 0; i < getOutputTensors().size(); ++i)
+  {
+    LUCI_INTERPRETER_CHECK(graph_outputs[i]->element_type() == output(i)->element_type());
+    output(i)->resize(graph_outputs[i]->shape());
+    // TODO: Think about how allocate memory for output in main graph
+    active_graph->configureAllocations(output(i));
+
+    const int32_t num_elements = output(i)->shape().num_elements();
+    const std::size_t element_size = getDataTypeSize(output(i)->element_type());
+    std::memcpy(output(i)->data<void>(), graph_outputs[i]->data<void>(),
+                num_elements * element_size);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/If.h b/compiler/luci-micro/luci-interpreter/src/kernels/If.h
new file mode 100644
index 000000000..fa6ab371a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/If.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_IF_H
+#define LUCI_INTERPRETER_KERNELS_IF_H
+
+#include "core/Kernel.h"
+#include "core/RuntimeGraph.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class If : public Kernel
+{
+public:
+  If(const Tensor *cond, const std::vector<const Tensor *> &inputs, std::vector<Tensor *> outputs,
+     RuntimeGraph *then_graph, RuntimeGraph *else_graph);
+
+  const Tensor *cond() const { return _inputs[0]; }
+  const Tensor *input(int index) const { return _inputs[1 + index]; }
+  Tensor *output(int index) const { return _outputs[index]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  RuntimeGraph *const _then_graph;
+  RuntimeGraph *const _else_graph;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_IF_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/If.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/If.test.cpp
new file mode 100644
index 000000000..c5f4faf75
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/If.test.cpp
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "core/RuntimeModule.h"
+#include "kernels/Add.h"
+#include "kernels/If.h"
+#include "kernels/Mul.h"
+#include "kernels/TestUtils.h"
+
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class IfTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+RuntimeGraph *buildAddSubgraph(RuntimeModule *module, IMemoryManager *memory_manager)
+{
+  RuntimeGraph *graph = module->addGraph(memory_manager);
+  Tensor *input1 = graph->addTensor(
+    std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+  Tensor *input2 = graph->addTensor(
+    std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+  Tensor *output = graph->addTensor(
+    std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+
+  memory_manager->allocate_memory(*input1);
+  memory_manager->allocate_memory(*input2);
+  memory_manager->allocate_memory(*output);
+
+  graph->setInputTensors({input1, input2});
+  graph->setOutputTensors({output});
+
+  AddParams params{};
+  params.activation = Activation::NONE;
+  graph->addKernel(std::make_unique<Add>(input1, input2, output, params));
+
+  return graph;
+}
+
+RuntimeGraph *buildMulSubgraph(RuntimeModule *module, IMemoryManager *memory_manager)
+{
+  RuntimeGraph *graph = module->addGraph(memory_manager);
+  Tensor *input1 = graph->addTensor(
+    std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+  Tensor *input2 = graph->addTensor(
+    std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+  Tensor *output = graph->addTensor(
+    std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+
+  memory_manager->allocate_memory(*input1);
+  memory_manager->allocate_memory(*input2);
+  memory_manager->allocate_memory(*output);
+
+  graph->setInputTensors({input1, input2});
+  graph->setOutputTensors({output});
+
+  MulParams params{};
+  params.activation = Activation::NONE;
+  graph->addKernel(std::make_unique<Mul>(input1, input2, output, params));
+
+  return graph;
+}
+
+TEST_F(IfTest, CondTrue)
+{
+  Tensor cond = makeInputTensor<DataType::BOOL>({1}, {true}, _memory_manager.get());
+  Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get());
+  Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get());
+  Tensor output = makeOutputTensor(DataType::FLOAT32);
+
+  RuntimeModule module(nullptr);
+  RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get());
+  RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get());
+
+  If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph);
+  kernel.configure();
+  _memory_manager->allocate_memory(output);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output), FloatArrayNear({6, 9}));
+}
+
+TEST_F(IfTest, CondFalse)
+{
+  Tensor cond = makeInputTensor<DataType::BOOL>({1}, {false}, _memory_manager.get());
+  Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get());
+  Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get());
+  Tensor output = makeOutputTensor(DataType::FLOAT32);
+
+  RuntimeModule module(nullptr);
+  RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get());
+  RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get());
+
+  If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph);
+  kernel.configure();
+  _memory_manager->allocate_memory(output);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output), FloatArrayNear({5, 14}));
+}
+
+TEST_F(IfTest, InvalidCondType_NEG)
+{
+  Tensor cond = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get());
+  Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get());
+  Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get());
+  Tensor output = makeOutputTensor(DataType::FLOAT32);
+
+  RuntimeModule module(nullptr);
+  RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get());
+  RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get());
+
+  If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(IfTest, InvalidCondElementNum_NEG)
+{
+  Tensor cond = makeInputTensor<DataType::BOOL>({2}, {false, true}, _memory_manager.get());
+  Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7}, _memory_manager.get());
+  Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2}, _memory_manager.get());
+  Tensor output = makeOutputTensor(DataType::FLOAT32);
+
+  RuntimeModule module(nullptr);
+  RuntimeGraph *then_graph = buildAddSubgraph(&module, _memory_manager.get());
+  RuntimeGraph *else_graph = buildMulSubgraph(&module, _memory_manager.get());
+
+  If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.cpp
new file mode 100644
index 000000000..22a329be6
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.cpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/InstanceNorm.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/common.h>
+#include <cmath>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+InstanceNorm::InstanceNorm(const Tensor *input, const Tensor *gamma, const Tensor *beta,
+                           Tensor *output, const InstanceNormParams &params)
+  : KernelWithParams<InstanceNormParams>({input, gamma, beta}, {output}, params)
+{
+}
+
+void InstanceNorm::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(gamma()->element_type() == input()->element_type());
+  LUCI_INTERPRETER_CHECK(gamma()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(gamma()->shape().dim(0) == input()->shape().dim(3) ||
+                         gamma()->shape().dim(0) == 1);
+  LUCI_INTERPRETER_CHECK(beta()->element_type() == input()->element_type());
+  LUCI_INTERPRETER_CHECK(beta()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(beta()->shape().dim(0) == input()->shape().dim(3) ||
+                         beta()->shape().dim(0) == 1);
+  output()->resize(input()->shape());
+}
+
+void InstanceNorm::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void InstanceNorm::evalFloat() const
+{
+  float activation_min, activation_max;
+  calculateActivationRange(params().activation, &activation_min, &activation_max);
+  auto input_shape = getTensorShape(input());
+  auto output_shape = getTensorShape(output());
+  const int32_t batches = tflite::MatchingDim(input_shape, 0, output_shape, 0);
+  const int32_t heights = tflite::MatchingDim(input_shape, 1, output_shape, 1);
+  const int32_t widths = tflite::MatchingDim(input_shape, 2, output_shape, 2);
+  const int32_t channels = tflite::MatchingDim(input_shape, 3, output_shape, 3);
+  const float *input_data = getTensorData<float>(input());
+  const float *gamma_data = getTensorData<float>(gamma());
+  auto gamma_shape = getTensorShape(gamma());
+  bool single_gamma = gamma_shape.DimensionsCount() == 1 && gamma_shape.Dims(0) == 1;
+  const float *beta_data = getTensorData<float>(beta());
+  auto beta_shape = getTensorShape(beta());
+  bool single_beta = beta_shape.DimensionsCount() == 1 && beta_shape.Dims(0) == 1;
+  float *output_data = getTensorData<float>(output());
+  for (int32_t batch = 0; batch < batches; batch++)
+  {
+    for (int32_t channel = 0; channel < channels; channel++)
+    {
+      double sum = 0.0f;
+      double square_sum = 0.0f;
+      int32_t size = heights * widths;
+      for (int32_t height = 0; height < heights; height++)
+      {
+        for (int32_t width = 0; width < widths; width++)
+        {
+          double input_val = input_data[tflite::Offset(input_shape, batch, height, width, channel)];
+          sum += input_val;
+          square_sum += (input_val * input_val);
+        }
+      }
+      double mean = sum / size;
+      double var = square_sum / size - mean * mean;
+
+      double gamma = single_gamma ? gamma_data[0] : gamma_data[channel];
+      double beta = single_beta ? beta_data[0] : beta_data[channel];
+      double a = gamma / (std::sqrt(var + params().epsilon));
+      double b = -mean * a + beta;
+
+      for (int32_t height = 0; height < heights; height++)
+      {
+        for (int32_t width = 0; width < widths; width++)
+        {
+          double input_value =
+            input_data[tflite::Offset(output_shape, batch, height, width, channel)];
+          double output_value = input_value * a + b;
+          output_data[tflite::Offset(output_shape, batch, height, width, channel)] =
+            tflite::ActivationFunctionWithMinMax((float)output_value, activation_min,
+                                                 activation_max);
+        }
+      }
+    }
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.h b/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.h
new file mode 100644
index 000000000..a70a84e0a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_INSTANCENORM_H
+#define LUCI_INTERPRETER_KERNELS_INSTANCENORM_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class InstanceNorm : public KernelWithParams<InstanceNormParams>
+{
+public:
+  InstanceNorm(const Tensor *input, const Tensor *gamma, const Tensor *beta, Tensor *output,
+               const InstanceNormParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *gamma() const { return _inputs[1]; }
+  const Tensor *beta() const { return _inputs[2]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_INSTANCENORM_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.test.cpp
new file mode 100644
index 000000000..04400c3c0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/InstanceNorm.test.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "kernels/InstanceNorm.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class InstanceNormTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(InstanceNormTest, Simple)
+{
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 2, 1}, {1, 1, 1, 1}, _memory_manager.get());
+  Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get());
+  Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  InstanceNormParams params{};
+  params.epsilon = 0.1f;
+  params.activation = Activation::NONE;
+
+  InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear({2, 2, 2, 2}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 1}));
+}
+
+TEST_F(InstanceNormTest, Single_gamma_beta)
+{
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 1, 2}, {1, 1, 1, 1}, _memory_manager.get());
+  Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1}, _memory_manager.get());
+  Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  InstanceNormParams params{};
+  params.epsilon = 0.1f;
+  params.activation = Activation::NONE;
+
+  InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear({2, 2, 2, 2}));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 2}));
+}
+
+TEST_F(InstanceNormTest, Wrong_gamma_beta_dim_NEG)
+{
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 1, 2}, {1, 1, 1, 1}, _memory_manager.get());
+  Tensor gamma_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1, 1, 1}, _memory_manager.get());
+  Tensor beta_tensor = makeInputTensor<DataType::FLOAT32>({3}, {2, 2, 2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  InstanceNormParams params{};
+  params.epsilon = 0.1f;
+  params.activation = Activation::NONE;
+
+  InstanceNorm kernel(&input_tensor, &gamma_tensor, &beta_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.cpp
new file mode 100644
index 000000000..64222953f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/L2Normalize.h"
+#include "kernels/Utils.h"
+
+#include "PALL2Normalize.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+L2Normalize::L2Normalize(const Tensor *input, Tensor *output, const L2NormParams &params)
+  : KernelWithParams<L2NormParams>({input}, {output}, params)
+{
+}
+
+void L2Normalize::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= 4);
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32 ||
+                         output()->element_type() == DataType::U8);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  if (output()->element_type() == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(output()->scale() == (1. / 128.));
+    LUCI_INTERPRETER_CHECK(output()->zero_point() == 128);
+  }
+  LUCI_INTERPRETER_CHECK(params().activation == Activation::NONE);
+  output()->resize(input()->shape());
+}
+
+void L2Normalize::execute() const
+{
+  switch (output()->element_type())
+  {
+    case DataType::FLOAT32:
+      eval<float>(0);
+      break;
+    case DataType::U8:
+      eval<uint8_t>(input()->zero_point());
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T> void L2Normalize::eval(int32_t zero_point) const
+{
+  tflite::L2NormalizationParams op_params{};
+  op_params.input_zero_point = zero_point;
+  luci_interpreter_pal::L2Normalization(op_params, getTensorShape(input()),
+                                        getTensorData<T>(input()), getTensorShape(output()),
+                                        getTensorData<T>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.h b/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.h
new file mode 100644
index 000000000..6c7dac698
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_L2NORMALIZE_H
+#define LUCI_INTERPRETER_KERNELS_L2NORMALIZE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class L2Normalize : public KernelWithParams<L2NormParams>
+{
+public:
+  L2Normalize(const Tensor *input, Tensor *output, const L2NormParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> void eval(int32_t zero_point) const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_L2NORMALIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.test.cpp
new file mode 100644
index 000000000..6f960e8b4
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/L2Normalize.test.cpp
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "kernels/L2Normalize.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  L2NormParams params{};
+  params.activation = Activation::NONE;
+
+  L2Normalize kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <>
+void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
+                    std::initializer_list<int32_t> output_shape,
+                    std::initializer_list<float> input_data,
+                    std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::pair<float, int32_t> quant_param =
+    quantizationParams<uint8_t>(std::min(input_data) < 0 ? std::min(input_data) : 0.f,
+                                std::max(input_data) > 0 ? std::max(input_data) : 0.f);
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    input_shape, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 128., 128);
+
+  L2NormParams params{};
+  params.activation = Activation::NONE;
+
+  L2Normalize kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, output_tensor.scale()));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <typename T> class L2NormalizeTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(L2NormalizeTest, DataTypes);
+
+TYPED_TEST(L2NormalizeTest, Simple)
+{
+  Check<TypeParam>({1, 1, 1, 6}, {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1},
+                   {-0.55, 0.3, 0.35, 0.6, -0.35, 0.05});
+}
+
+TEST(L2NormalizeTest, ActivationType_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data = {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  L2NormParams params{};
+  params.activation = Activation::RELU6;
+
+  L2Normalize kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(L2NormalizeTest, InvalidOutputQuantParam_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data = {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({1, 1, 1, 6}, 1. / 64., 127, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 64., 127);
+
+  L2NormParams params{};
+  params.activation = Activation::NONE;
+
+  L2Normalize kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.cpp
new file mode 100644
index 000000000..5a88808d5
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/L2Pool2D.h"
+
+#include "kernels/Utils.h"
+
+#include "PALL2Pool2D.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+L2Pool2D::L2Pool2D(const Tensor *input, Tensor *output, const Pool2DParams &params)
+  : KernelWithParams<Pool2DParams>({input}, {output}, params)
+{
+}
+
+void L2Pool2D::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+  int batches = input()->shape().dim(0);
+  int height = input()->shape().dim(1);
+  int width = input()->shape().dim(2);
+  int channels_out = input()->shape().dim(3);
+
+  // Matching GetWindowedOutputSize in TensorFlow.
+  auto padding = params().padding;
+  int out_width, out_height;
+  out_width = computeOutputSize(padding, width, params().filter_width, params().stride_width, 1);
+  out_height =
+    computeOutputSize(padding, height, params().filter_height, params().stride_height, 1);
+  _padding_width =
+    computePadding(params().stride_width, 1, width, params().filter_width, out_width);
+  _padding_height =
+    computePadding(params().stride_height, 1, height, params().filter_height, out_height);
+
+  LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::FLOAT32);
+  output()->resize({batches, out_height, out_width, channels_out});
+}
+
+void L2Pool2D::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      float activation_min, activation_max;
+      calculateActivationRange(params().activation, &activation_min, &activation_max);
+      tflite::PoolParams op_params;
+      op_params.stride_height = params().stride_height;
+      op_params.stride_width = params().stride_width;
+      op_params.filter_height = params().filter_height;
+      op_params.filter_width = params().filter_width;
+      op_params.padding_values.height = _padding_height;
+      op_params.padding_values.width = _padding_width;
+      op_params.float_activation_min = activation_min;
+      op_params.float_activation_max = activation_max;
+      luci_interpreter_pal::L2Pool(op_params, getTensorShape(input()),
+                                   getTensorData<float>(input()), getTensorShape(output()),
+                                   getTensorData<float>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.h b/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.h
new file mode 100644
index 000000000..d40f5f478
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_L2POOL2D_H
+#define LUCI_INTERPRETER_KERNELS_L2POOL2D_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <memory>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class L2Pool2D : public KernelWithParams<Pool2DParams>
+{
+public:
+  L2Pool2D(const Tensor *input, Tensor *output, const Pool2DParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  int32_t _padding_height = 0;
+  int32_t _padding_width = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_L2POOL2D_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.test.cpp
new file mode 100644
index 000000000..7245456cb
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/L2Pool2D.test.cpp
@@ -0,0 +1,291 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/L2Pool2D.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class L2Pool2DTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(L2Pool2DTest, FloatNone)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+    0, 6, 2,  4, //
+    3, 2, 10, 7, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.activation = Activation::NONE;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{3.5, 6.5};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  // TODO make a Shape checking of output_tensor.
+}
+
+TEST_F(L2Pool2DTest, FloatRelu)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+    -1, -6, 2,  4, //
+    -3, -2, 10, 7, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.activation = Activation::RELU;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{3.53553, 6.5};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  // TODO make a Shape checking of output_tensor.
+}
+
+TEST_F(L2Pool2DTest, FloatRelu1)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+    -0.1, -0.6, 2,  4, //
+    -0.3, -0.2, 10, 7, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.activation = Activation::RELU_N1_TO_1;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.353553, 1.0};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  // TODO make a Shape checking of output_tensor.
+}
+
+TEST_F(L2Pool2DTest, FloatRelu6)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+    -0.1, -0.6, 2,  4, //
+    -0.3, -0.2, 10, 7, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.activation = Activation::RELU6;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.353553, 6.0};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  // TODO make a Shape checking of output_tensor.
+}
+
+TEST_F(L2Pool2DTest, FloatPaddingSame)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+    0, 6, 2,  4, //
+    3, 2, 10, 7, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::SAME;
+  params.activation = Activation::NONE;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{3.5, 6.5};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  // TODO make a Shape checking of output_tensor.
+}
+
+TEST_F(L2Pool2DTest, FloatPaddingSameStride)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+    0, 6, 2,  4, //
+    3, 2, 10, 7, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::SAME;
+  params.activation = Activation::NONE;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 1;
+  params.stride_width = 1;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{3.5, 6.0, 6.5, 5.70088, 2.54951, 7.2111, 8.63134, 7.0};
+  // NOTE with NEON+ruy, error is #1=-1.14441e-05, #6=-1.81198e-05
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, 1.0e-4f));
+  // TODO make a Shape checking of output_tensor.
+}
+
+TEST_F(L2Pool2DTest, FloatPaddingValidStride)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+    0, 6, 2,  4, //
+    3, 2, 10, 7, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.activation = Activation::NONE;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 1;
+  params.stride_width = 1;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{3.5, 6.0, 6.5};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  // TODO make a Shape checking of output_tensor.
+}
+
+TEST_F(L2Pool2DTest, InvalidInputShape_NEG)
+{
+  Shape input_shape{1, 2, 4};
+  std::vector<float> input_data{
+    0, 6, 2,  4, //
+    3, 2, 10, 7, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.activation = Activation::NONE;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 1;
+  params.stride_width = 1;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(L2Pool2DTest, InvalidInputOutputType_NEG)
+{
+  Shape input_shape{1, 2, 4};
+  std::vector<float> input_data{
+    0, 6, 2,  4, //
+    3, 2, 10, 7, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.activation = Activation::NONE;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 1;
+  params.stride_width = 1;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.cpp
new file mode 100644
index 000000000..3833a55e8
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LeakyRelu.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/leaky_relu.h>
+
+#include "PALLeakyRelu.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+LeakyRelu::LeakyRelu(const Tensor *input, Tensor *output, const LeakyReluParams &params)
+  : KernelWithParams<LeakyReluParams>({input}, {output}, params)
+{
+}
+
+void LeakyRelu::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  if (input()->element_type() == DataType::U8)
+  {
+    double alpha_multiplier = input()->scale() * params().alpha / output()->scale();
+    quantizeMultiplier(alpha_multiplier, &_output_multiplier_alpha, &_output_shift_alpha);
+    double identity_multiplier = input()->scale() / output()->scale();
+    quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
+  }
+  output()->resize(input()->shape());
+}
+
+void LeakyRelu::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void LeakyRelu::evalFloat() const
+{
+  tflite::LeakyReluParams op_params{};
+  op_params.alpha = params().alpha;
+  luci_interpreter_pal::LeakyRelu(op_params, getTensorShape(input()), getTensorData<float>(input()),
+                                  getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void LeakyRelu::evalQuantized() const
+{
+  tflite::LeakyReluParams op_params{};
+  op_params.input_offset = input()->zero_point();
+  op_params.output_offset = output()->zero_point();
+  op_params.output_multiplier_alpha = _output_multiplier_alpha;
+  op_params.output_shift_alpha = _output_shift_alpha;
+  op_params.output_multiplier_identity = _output_multiplier_identity;
+  op_params.output_shift_identity = _output_shift_identity;
+
+  tflite::reference_ops::QuantizeLeakyRelu(
+    op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(output()),
+    getTensorData<uint8_t>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.h b/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.h
new file mode 100644
index 000000000..e66f404df
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LEAKYRELU_H
+#define LUCI_INTERPRETER_KERNELS_LEAKYRELU_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class LeakyRelu : public KernelWithParams<LeakyReluParams>
+{
+public:
+  LeakyRelu(const Tensor *input, Tensor *output, const LeakyReluParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _output_multiplier_alpha = 0;
+  int _output_shift_alpha = 0;
+  int32_t _output_multiplier_identity = 0;
+  int _output_shift_identity = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LEAKYRELU_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.test.cpp
new file mode 100644
index 000000000..0f6263b57
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LeakyRelu.test.cpp
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LeakyRelu.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data,
+           float alpha)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  LeakyReluParams params{};
+  params.alpha = alpha;
+
+  LeakyRelu kernel(&input_tensor, &output_tensor, params);
+
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+}
+
+template <>
+void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
+                    std::initializer_list<int32_t> output_shape,
+                    std::initializer_list<float> input_data,
+                    std::initializer_list<float> output_data, float alpha)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  const float quantized_tolerance = getTolerance(-8, 127.f / 16.f, 255);
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-8, 127.f / 16.f);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    input_shape, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  LeakyReluParams params{};
+  params.alpha = alpha;
+
+  LeakyRelu kernel(&input_tensor, &output_tensor, params);
+
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, quantized_tolerance));
+}
+
+template <typename T> class LeakReluTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(LeakReluTest, DataTypes);
+
+TYPED_TEST(LeakReluTest, Simple)
+{
+  Check<TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 3},
+                   /*input_data=*/
+                   {
+                     0.0f, 1.0f, 3.0f,   // Row 1
+                     1.0f, -1.0f, -2.0f, // Row 2
+                   },
+                   /*output_data=*/
+                   {
+                     0.0f, 1.0f, 3.0f,   // Row 1
+                     1.0f, -0.5f, -1.0f, // Row 2
+                   },
+                   /*alpha=*/0.5f);
+
+  SUCCEED();
+}
+
+TEST(LeakReluTest, IvalidInputOutputType_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 3},
+                                                           {
+                                                             0.0f, 1.0f, 3.0f,   // Row 1
+                                                             1.0f, -1.0f, -2.0f, // Row 2
+                                                           },
+                                                           memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  LeakyReluParams params{};
+  params.alpha = 0.5f;
+
+  LeakyRelu kernel(&input_tensor, &output_tensor, params);
+
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Less.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Less.cpp
new file mode 100644
index 000000000..8d26ff297
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Less.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Less.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Less::Less(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void Less::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+  if (x()->element_type() == DataType::U8)
+  {
+    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+  }
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void Less::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Less::evalFloat() const
+{
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowLess(op_params, getTensorShape(x()), x_data,
+                                               getTensorShape(y()), y_data,
+                                               getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::Less(op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data,
+                                getTensorShape(output()), output_data);
+  }
+}
+
+template <typename T> void Less::evalInteger() const
+{
+  const auto x_data = getTensorData<T>(x());
+  const auto y_data = getTensorData<T>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowLessNoScaling(op_params, getTensorShape(x()), x_data,
+                                                        getTensorShape(y()), y_data,
+                                                        getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::LessNoScaling(op_params, getTensorShape(x()), x_data,
+                                         getTensorShape(y()), y_data, getTensorShape(output()),
+                                         output_data);
+  }
+}
+
+void Less::evalQuantized() const
+{
+  const auto x_data = getTensorData<uint8_t>(x());
+  const auto y_data = getTensorData<uint8_t>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.left_shift = 8;
+  op_params.input1_offset = -x()->zero_point(); // Note the '-'
+  op_params.input1_shift = _x_shift;
+  op_params.input1_multiplier = _x_multiplier;
+  op_params.input2_offset = -y()->zero_point(); // Note the '-'
+  op_params.input2_shift = _y_shift;
+  op_params.input2_multiplier = _y_multiplier;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowLessWithScaling(op_params, getTensorShape(x()), x_data,
+                                                          getTensorShape(y()), y_data,
+                                                          getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::LessWithScaling(op_params, getTensorShape(x()), x_data,
+                                           getTensorShape(y()), y_data, getTensorShape(output()),
+                                           output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Less.h b/compiler/luci-micro/luci-interpreter/src/kernels/Less.h
new file mode 100644
index 000000000..e27bb689c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Less.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LESS_H
+#define LUCI_INTERPRETER_KERNELS_LESS_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Less : public Kernel
+{
+public:
+  Less(const Tensor *x, const Tensor *y, Tensor *output);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  template <typename T> void evalInteger() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _x_multiplier = 0;
+  int _x_shift = 0;
+  int32_t _y_multiplier = 0;
+  int _y_shift = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LESS_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Less.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Less.test.cpp
new file mode 100644
index 000000000..8c5963363
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Less.test.cpp
@@ -0,0 +1,334 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Less.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class LessTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LessTest, FloatSimple)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+    -1,  0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  false, false, // Row 1
+    false, false, true,  // Row 2
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(LessTest, FloatBroardcast)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+    -1,  0,   1,   // Row 3
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  false, false, // Row 1
+    false, true,  true,  // Row 2
+    true,  true,  false, // Row 3
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{min_value, 2, max_value};
+
+  std::vector<dtype> y_data{min_value + 1, -2, max_value};
+
+  std::vector<bool> ref_output_data{true, false, false};
+
+  Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{
+    min_value, 2,  3,         // Row 1
+    4,         5,  max_value, // Row 2
+    -1,        -4, -3,        // Row 3
+    min_value, -2, max_value, // Row 4
+  };
+
+  std::vector<dtype> y_data{
+    min_value + 1, -2, max_value - 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  false, true,  // Row 1
+    false, false, false, // Row 2
+    false, true,  true,  // Row 3
+    true,  false, false, // Row 4
+  };
+
+  Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(LessTest, Int32)
+{
+  checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(LessTest, Int64)
+{
+  checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
+// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+const float F_MIN = -128.0 / 128.0;
+const float F_MAX = 127.0 / 128.0;
+
+TEST_F(LessTest, Uint8Quantized)
+{
+  std::vector<float> x_data{
+    0.5, 0.6, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.6,  0.55, 0.5, // Row 1
+    -1,  0.05, 0,    1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  false, false, false, // Row 1
+    false, true,  false, true,  // Row 2
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(LessTest, Uint8QuantizedRescale)
+{
+  std::vector<float> x_data{
+    0.5, 0.6, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.6,  0.6, 0.5, // Row 1
+    -1,  0.05, 0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  false, false, false, // Row 1
+    false, true,  false, true,  // Row 2
+  };
+
+  std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5);
+
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(LessTest, Uint8QuantizedBroadcast)
+{
+  std::vector<float> x_data{
+    0.4,  -0.8, 0.7,  0.3, // Row 1
+    -0.5, 0.1,  0,    0.5, // Row 2
+    1,    0,    0.05, -1,  // Row 3
+  };
+
+  std::vector<float> y_data{
+    -1, 0.05, 0, 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true,  false, true, // Row 1
+    false, false, false, true, // Row 2
+    false, true,  false, true, // Row 3
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(LessTest, Input_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessTest, Input_Output_Type_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessTest, Float_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessTest, Int32_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessTest, Int64_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.cpp
new file mode 100644
index 000000000..b474bc47a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LessEqual.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+LessEqual::LessEqual(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void LessEqual::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+  if (x()->element_type() == DataType::U8)
+  {
+    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+  }
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void LessEqual::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void LessEqual::evalFloat() const
+{
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowLessEqual(op_params, getTensorShape(x()), x_data,
+                                                    getTensorShape(y()), y_data,
+                                                    getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::LessEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+                                     y_data, getTensorShape(output()), output_data);
+  }
+}
+
+template <typename T> void LessEqual::evalInteger() const
+{
+  const auto x_data = getTensorData<T>(x());
+  const auto y_data = getTensorData<T>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowLessEqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                                             getTensorShape(y()), y_data,
+                                                             getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::LessEqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                              getTensorShape(y()), y_data, getTensorShape(output()),
+                                              output_data);
+  }
+}
+
+void LessEqual::evalQuantized() const
+{
+  const auto x_data = getTensorData<uint8_t>(x());
+  const auto y_data = getTensorData<uint8_t>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.left_shift = 8;
+  op_params.input1_offset = -x()->zero_point(); // Note the '-'
+  op_params.input1_shift = _x_shift;
+  op_params.input1_multiplier = _x_multiplier;
+  op_params.input2_offset = -y()->zero_point(); // Note the '-'
+  op_params.input2_shift = _y_shift;
+  op_params.input2_multiplier = _y_multiplier;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowLessEqualWithScaling(
+      op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      output_data);
+  }
+  else
+  {
+    tflite::reference_ops::LessEqualWithScaling(op_params, getTensorShape(x()), x_data,
+                                                getTensorShape(y()), y_data,
+                                                getTensorShape(output()), output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.h b/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.h
new file mode 100644
index 000000000..f82ea90d4
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LESS_EQUAL_H
+#define LUCI_INTERPRETER_KERNELS_LESS_EQUAL_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class LessEqual : public Kernel
+{
+public:
+  LessEqual(const Tensor *x, const Tensor *y, Tensor *output);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  template <typename T> void evalInteger() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _x_multiplier = 0;
+  int _x_shift = 0;
+  int32_t _y_multiplier = 0;
+  int _y_shift = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LESS_EQUAL_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.test.cpp
new file mode 100644
index 000000000..b2e2fa7a1
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LessEqual.test.cpp
@@ -0,0 +1,334 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LessEqual.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class LessEqualTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LessEqualTest, FloatSimple)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+    -1,  0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  true, false, // Row 1
+    false, true, true,  // Row 2
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(LessEqualTest, FloatBroardcast)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+    -1,  0,   1,   // Row 3
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  true, false, // Row 1
+    false, true, true,  // Row 2
+    true,  true, false, // Row 3
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{min_value, 2, max_value};
+
+  std::vector<dtype> y_data{min_value + 1, -2, max_value};
+
+  std::vector<bool> ref_output_data{true, false, true};
+
+  Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{
+    min_value, 2,  3,         // Row 1
+    4,         5,  max_value, // Row 2
+    -1,        -4, -3,        // Row 3
+    min_value, -2, max_value, // Row 4
+  };
+
+  std::vector<dtype> y_data{
+    min_value + 1, -2, max_value - 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  false, true,  // Row 1
+    false, false, false, // Row 2
+    false, true,  true,  // Row 3
+    true,  true,  false, // Row 4
+  };
+
+  Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(LessEqualTest, Int32)
+{
+  checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(LessEqualTest, Int64)
+{
+  checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
+// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+const float F_MIN = -128.0 / 128.0;
+const float F_MAX = 127.0 / 128.0;
+
+TEST_F(LessEqualTest, Uint8Quantized)
+{
+  std::vector<float> x_data{
+    0.5, 0.6, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.6,  0.55, 0.5, // Row 1
+    -1,  0.05, 0,    1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  true, false, false, // Row 1
+    false, true, false, true,  // Row 2
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(LessEqualTest, Uint8QuantizedRescale)
+{
+  std::vector<float> x_data{
+    0.5, 0.6, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.6,  0.6, 0.5, // Row 1
+    -1,  0.05, 0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  true, false, false, // Row 1
+    false, true, false, true,  // Row 2
+  };
+
+  std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5);
+
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(LessEqualTest, Uint8QuantizedBroadcast)
+{
+  std::vector<float> x_data{
+    0.4,  -0.8, 0.7,  0.3, // Row 1
+    -0.5, 0.1,  0,    0.5, // Row 2
+    1,    0,    0.05, -1,  // Row 3
+  };
+
+  std::vector<float> y_data{
+    -1, 0.05, 0, 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true,  false, true, // Row 1
+    false, false, true,  true, // Row 2
+    false, true,  false, true, // Row 3
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 3, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(LessEqualTest, Input_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessEqualTest, Input_Output_Type_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessEqualTest, Float_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessEqualTest, Int32_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LessEqualTest, Int64_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.cpp
new file mode 100644
index 000000000..a2bf442b0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LocalResponseNormalization.h"
+
+#include "kernels/Utils.h"
+
+#include "PALLocalResponseNormalization.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+LocalResponseNormalization::LocalResponseNormalization(
+  const Tensor *input, Tensor *output, const LocalResponseNormalizationParams &params)
+  : KernelWithParams<LocalResponseNormalizationParams>({input}, {output}, params)
+{
+}
+
+void LocalResponseNormalization::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  output()->resize(input()->shape());
+}
+
+void LocalResponseNormalization::execute() const
+{
+  switch (output()->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::LocalResponseNormalizationParams op_params;
+      op_params.range = params().radius;
+      op_params.bias = params().bias;
+      op_params.alpha = params().alpha;
+      op_params.beta = params().beta;
+      luci_interpreter_pal::LocalResponseNormalization(
+        op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(output()),
+        getTensorData<float>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.h b/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.h
new file mode 100644
index 000000000..60408a104
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LOCALRESPONSENORMALIZATION_H
+#define LUCI_INTERPRETER_KERNELS_LOCALRESPONSENORMALIZATION_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class LocalResponseNormalization : public KernelWithParams<LocalResponseNormalizationParams>
+{
+public:
+  LocalResponseNormalization(const Tensor *input, Tensor *output,
+                             const LocalResponseNormalizationParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LOCALRESPONSENORMALIZATION_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp
new file mode 100644
index 000000000..4a9d4739f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LocalResponseNormalization.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class LocalResponseNormalizationTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LocalResponseNormalizationTest, SameAsL2Norm)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  LocalResponseNormalizationParams params{};
+  params.radius = 20;
+  params.bias = 0.0;
+  params.alpha = 1.0;
+  params.beta = 0.5;
+
+  LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({-0.55, 0.3, 0.35, 0.6, -0.35, 0.05}));
+}
+
+TEST_F(LocalResponseNormalizationTest, WithAlpha)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  LocalResponseNormalizationParams params{};
+  params.radius = 20;
+  params.bias = 0.0;
+  params.alpha = 4.0;
+  params.beta = 0.5;
+
+  LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({-0.275, 0.15, 0.175, 0.3, -0.175, 0.025}));
+}
+
+TEST_F(LocalResponseNormalizationTest, WithBias)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  LocalResponseNormalizationParams params{};
+  params.radius = 20;
+  params.bias = 9.0;
+  params.alpha = 4.0;
+  params.beta = 0.5;
+
+  LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({-0.22, 0.12, 0.14, 0.24, -0.14, 0.02}));
+}
+
+TEST_F(LocalResponseNormalizationTest, SmallRadius)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  LocalResponseNormalizationParams params{};
+  params.radius = 2;
+  params.bias = 9.0;
+  params.alpha = 4.0;
+  params.beta = 0.5;
+
+  LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              FloatArrayNear({-0.264926, 0.125109, 0.140112, 0.267261, -0.161788, 0.0244266}));
+}
+
+TEST_F(LocalResponseNormalizationTest, InvalidInputDimension_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  LocalResponseNormalizationParams params{};
+  params.radius = 20;
+  params.bias = 0.0;
+  params.alpha = 1.0;
+  params.beta = 0.5;
+
+  LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LocalResponseNormalizationTest, InvalidInputOutputType_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  LocalResponseNormalizationParams params{};
+  params.radius = 20;
+  params.bias = 0.0;
+  params.alpha = 1.0;
+  params.beta = 0.5;
+
+  LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.cpp
new file mode 100644
index 000000000..79c315338
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogSoftmax.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/log_softmax.h>
+
+#include "PALLogSoftmax.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+LogSoftmax::LogSoftmax(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void LogSoftmax::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  if (input()->element_type() == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(output()->scale() == 16. / 256);
+    LUCI_INTERPRETER_CHECK(output()->zero_point() == 255);
+
+    tflite::SoftmaxParams params{};
+
+    params.table = _table;
+    params.beta = 1.0;
+    luci_interpreter_pal::PopulateSoftmaxLookupTable(&params, input()->scale(), params.beta);
+  }
+  output()->resize(input()->shape());
+}
+
+void LogSoftmax::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void LogSoftmax::evalFloat() const
+{
+  tflite::SoftmaxParams params{};
+  tflite::reference_ops::LogSoftmax(params, getTensorShape(input()), getTensorData<float>(input()),
+                                    getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void LogSoftmax::evalQuantized() const
+{
+  const auto input_shape = getTensorShape(input());
+  const auto output_shape = getTensorShape(output());
+  const auto input_scale = input()->scale();
+  uint8_t *output_data = getTensorData<uint8_t>(output());
+  const uint8_t *input_data = getTensorData<uint8_t>(input());
+  const float beta = 1.0;
+
+  tflite::SoftmaxParams params{};
+
+  params.table = const_cast<float *>(_table);
+  params.zero_point = output()->zero_point();
+  params.scale = output()->scale();
+
+  luci_interpreter_pal::InitializeParams(&params, input_scale, beta);
+  luci_interpreter_pal::LogSoftmax(params, input_scale, input_shape, input_data, output_shape,
+                                   output_data);
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.h b/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.h
new file mode 100644
index 000000000..18477fbe3
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LOGSOFTMAX_H
+#define LUCI_INTERPRETER_KERNELS_LOGSOFTMAX_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class LogSoftmax : public Kernel
+{
+public:
+  LogSoftmax(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+
+  float _table[256];
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LOGSOFTMAX_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.test.cpp
new file mode 100644
index 000000000..50dcd5c28
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogSoftmax.test.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogSoftmax.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class LogSoftmaxTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LogSoftmaxTest, Float)
+{
+  Shape input_shape{2, 4};
+  std::vector<float> input_data{
+    0, -6, 2,  4, //
+    3, -2, 10, 1, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  LogSoftmax kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    -4.14297, -10.14297, -2.14297,   -.142971, //
+    -7.00104, -12.00104, -.00104087, -9.00104, //
+  };
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(LogSoftmaxTest, Uint8)
+{
+  float kMin = -10;
+  float kMax = 10;
+  float kLogSoftmaxQuantizedTolerance = 16. / 256;
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(kMin, kMax);
+  std::vector<float> input_data{
+    0, -6, 2,  4, //
+    3, -2, 10, 1, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second,
+                                                      input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 16. / 256, 255);
+
+  LogSoftmax kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    -4.14297, -10.14297, -2.14297,   -.142971, //
+    -7.00104, -12.00104, -.00104087, -9.00104, //
+  };
+  std::vector<int32_t> ref_output_shape{2, 4};
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kLogSoftmaxQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray({189, 93, 221, 253, 142, 63, 255, 111}));
+}
+
+TEST_F(LogSoftmaxTest, InvalidInputOutputType_NEG)
+{
+  std::vector<float> input_data{
+    0, -6, 2,  4, //
+    3, -2, 10, 1, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 4}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 16. / 256, 255);
+
+  LogSoftmax kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LogSoftmaxTest, InvalidOutputQuantParam_NEG)
+{
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-10, 10);
+  std::vector<float> input_data{
+    0, -6, 2,  4, //
+    3, -2, 10, 1, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second,
+                                                      input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 20. / 256, 255);
+
+  LogSoftmax kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.cpp
new file mode 100644
index 000000000..8e7263231
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogicalAnd.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+LogicalAnd::LogicalAnd(const Tensor *input1, const Tensor *input2, Tensor *output)
+  : Kernel({input1, input2}, {output})
+{
+}
+
+void LogicalAnd::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void LogicalAnd::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::BOOL:
+      evalLogicalAnd();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+inline void LogicalAnd::evalLogicalAnd() const
+{
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<bool>(input1()),
+                        getTensorShape(input2()), getTensorData<bool>(input2()),
+                        getTensorShape(output()), getTensorData<bool>(output()),
+                        [](bool x, bool y) { return x && y; });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.h b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.h
new file mode 100644
index 000000000..46b889986
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LOGICALAND_H
+#define LUCI_INTERPRETER_KERNELS_LOGICALAND_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class LogicalAnd : public Kernel
+{
+public:
+  LogicalAnd(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  inline void evalLogicalAnd() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LOGICALAND_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.test.cpp
new file mode 100644
index 000000000..21b7951e0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalAnd.test.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogicalAnd.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class LogicalAndTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LogicalAndTest, Basic)
+{
+  Shape input_shape{1, 1, 1, 4};
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::BOOL>(input_shape, {true, false, false, true}, _memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::BOOL>(input_shape, {true, false, true, false}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LogicalAnd kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor),
+              ::testing::ElementsAre(true, false, false, false));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4));
+}
+
+TEST_F(LogicalAndTest, Broadcast)
+{
+  Tensor input_tensor1 = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true},
+                                                         _memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {true}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LogicalAnd kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor),
+              ::testing::ElementsAre(true, false, false, true));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4));
+}
+
+TEST_F(LogicalAndTest, MismatchInputType_NEG)
+{
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  LogicalAnd kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LogicalAndTest, InputTypeInvalid_NEG)
+{
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 1}, {0}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LogicalAnd kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.cpp
new file mode 100644
index 000000000..65ab961aa
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogicalNot.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+LogicalNot::LogicalNot(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void LogicalNot::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  output()->resize(input()->shape());
+}
+
+void LogicalNot::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::BOOL:
+      evalLogicalNot();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+inline void LogicalNot::evalLogicalNot() const
+{
+  const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
+  bool *output_data = getTensorData<bool>(output());
+  const bool *input_data = getTensorData<bool>(input());
+  for (int i = 0; i < size; ++i)
+  {
+    output_data[i] = !input_data[i];
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.h b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.h
new file mode 100644
index 000000000..1608fafa5
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LOGICALNOT_H
+#define LUCI_INTERPRETER_KERNELS_LOGICALNOT_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class LogicalNot : public Kernel
+{
+public:
+  LogicalNot(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  inline void evalLogicalNot() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LOGICALNOT_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.test.cpp
new file mode 100644
index 000000000..3cbf27f6b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalNot.test.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogicalNot.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class LogicalNotTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LogicalNotTest, Basic)
+{
+  Shape input_shape{1, 1, 1, 4};
+  Tensor input_tensor =
+    makeInputTensor<DataType::BOOL>(input_shape, {true, false, false, true}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LogicalNot kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor),
+              ::testing::ElementsAre(false, true, true, false));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4));
+}
+
+TEST_F(LogicalNotTest, OutputTypeInvalid_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true},
+                                                        _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  LogicalNot kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LogicalNotTest, InputTypeInvalid_NEG)
+{
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LogicalNot kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.cpp
new file mode 100644
index 000000000..f289ca64f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogicalOr.h"
+
+#include "kernels/Utils.h"
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+LogicalOr::LogicalOr(const Tensor *input1, const Tensor *input2, Tensor *output)
+  : Kernel({input1, input2}, {output})
+{
+}
+
+void LogicalOr::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == DataType::BOOL);
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void LogicalOr::execute() const
+{
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<bool>(input1()),
+                        getTensorShape(input2()), getTensorData<bool>(input2()),
+                        getTensorShape(output()), getTensorData<bool>(output()),
+                        [](bool x, bool y) { return x || y; });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.h b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.h
new file mode 100644
index 000000000..88606483f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LOGICALOR_H
+#define LUCI_INTERPRETER_KERNELS_LOGICALOR_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class LogicalOr : public Kernel
+{
+public:
+  LogicalOr(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LOGICALOR_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.test.cpp
new file mode 100644
index 000000000..d65a69a5e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/LogicalOr.test.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogicalOr.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class LogicalOrTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(LogicalOrTest, Basic)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true},
+                                                         _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, true, false},
+                                                         _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor),
+              ::testing::ElementsAre(true, false, true, true));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4));
+}
+
+TEST_F(LogicalOrTest, Broadcast)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::BOOL>({1, 1, 1, 4}, {true, false, false, true},
+                                                         _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false}, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor),
+              ::testing::ElementsAre(true, false, false, true));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAre(1, 1, 1, 4));
+}
+
+TEST_F(LogicalOrTest, MismatchInputType_NEG)
+{
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::BOOL>({1, 1, 1, 1}, {false}, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(LogicalOrTest, InputTypeInvalid_NEG)
+{
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S32>({1, 1, 1, 4}, {1, 0, 0, 1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1, 1, 1, 1}, {0}, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LogicalOr kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.cpp
new file mode 100644
index 000000000..58e4f185d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.cpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Logistic.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/logistic.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Logistic::Logistic(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Logistic::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  if (input()->element_type() == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(output()->scale() == 1. / 256);
+    populateLookupTable();
+  }
+  output()->resize(input()->shape());
+}
+
+void Logistic::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Logistic::evalFloat() const
+{
+  tflite::reference_ops::Logistic(getTensorShape(input()), getTensorData<float>(input()),
+                                  getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void Logistic::evalQuantized() const
+{
+  const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
+  uint8_t *output_data = getTensorData<uint8_t>(output());
+  const uint8_t *input_data = getTensorData<uint8_t>(input());
+  for (int i = 0; i < size; ++i)
+  {
+    output_data[i] = getTableValue(input_data[i]);
+  }
+}
+
+void Logistic::populateLookupTable()
+{
+  const auto input_scale = static_cast<double>(input()->scale());
+  const auto input_zero_point = static_cast<int32_t>(input()->zero_point());
+  const auto output_scale = static_cast<double>(output()->scale());
+  const auto output_zero_point = static_cast<int32_t>(output()->zero_point());
+  const float inverse_scale = 1 / output_scale;
+  int32_t maxval = std::numeric_limits<uint8_t>::max();
+  int32_t minval = std::numeric_limits<uint8_t>::min();
+  for (int32_t val = minval; val <= maxval; ++val)
+  {
+    const float dequantized = input_scale * (val - input_zero_point);
+    const float transformed = 1.0f / (1.0f + std::exp(-dequantized));
+    const float rescaled = std::round(transformed * inverse_scale);
+    const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
+    setTableValue(static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval)),
+                  static_cast<uint8_t>(val));
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.h b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.h
new file mode 100644
index 000000000..31de6adf0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LOGISTIC_H
+#define LUCI_INTERPRETER_KERNELS_LOGISTIC_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Logistic : public Kernel
+{
+public:
+  Logistic(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void populateLookupTable();
+  void setTableValue(uint8_t value, uint8_t idx) { _table[idx] = value; };
+  uint8_t getTableValue(uint8_t idx) const { return _table[idx]; };
+
+private:
+  uint8_t _table[256]{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LOGISTIC_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.test.cpp
new file mode 100644
index 000000000..5a1ea669c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Logistic.test.cpp
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Logistic.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<getElementType<T>()>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(getElementType<T>());
+
+  Logistic kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <>
+void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
+                    std::initializer_list<int32_t> output_shape,
+                    std::initializer_list<float> input_data,
+                    std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  std::pair<float, int32_t> input_quant_param =
+    quantizationParams<uint8_t>(std::min(input_data), std::max(input_data));
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 256, 0);
+
+  Logistic kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, output_tensor.scale() * 2));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <typename T> class LogisticTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(LogisticTest, DataTypes);
+
+TYPED_TEST(LogisticTest, Simple)
+{
+  Check<TypeParam>(
+    {89}, {89},
+    {-10.0000000000, -9.7727272727, -9.5454545455, -9.3181818182, -9.0909090909, -8.8636363636,
+     -8.6363636364,  -8.4090909091, -8.1818181818, -7.9545454545, -7.7272727273, -7.5000000000,
+     -7.2727272727,  -7.0454545455, -6.8181818182, -6.5909090909, -6.3636363636, -6.1363636364,
+     -5.9090909091,  -5.6818181818, -5.4545454545, -5.2272727273, -5.0000000000, -4.7727272727,
+     -4.5454545455,  -4.3181818182, -4.0909090909, -3.8636363636, -3.6363636364, -3.4090909091,
+     -3.1818181818,  -2.9545454545, -2.7272727273, -2.5000000000, -2.2727272727, -2.0454545455,
+     -1.8181818182,  -1.5909090909, -1.3636363636, -1.1363636364, -0.9090909091, -0.6818181818,
+     -0.4545454545,  -0.2272727273, 0.0000000000,  0.2272727273,  0.4545454545,  0.6818181818,
+     0.9090909091,   1.1363636364,  1.3636363636,  1.5909090909,  1.8181818182,  2.0454545455,
+     2.2727272727,   2.5000000000,  2.7272727273,  2.9545454545,  3.1818181818,  3.4090909091,
+     3.6363636364,   3.8636363636,  4.0909090909,  4.3181818182,  4.5454545455,  4.7727272727,
+     5.0000000000,   5.2272727273,  5.4545454545,  5.6818181818,  5.9090909091,  6.1363636364,
+     6.3636363636,   6.5909090909,  6.8181818182,  7.0454545455,  7.2727272727,  7.5000000000,
+     7.7272727273,   7.9545454545,  8.1818181818,  8.4090909091,  8.6363636364,  8.8636363636,
+     9.0909090909,   9.3181818182,  9.5454545455,  9.7727272727,  10.0000000000},
+    {0.0000453979, 0.0000569815, 0.0000715205, 0.0000897689, 0.0001126729, 0.0001414198,
+     0.0001774998, 0.0002227827, 0.0002796147, 0.0003509396, 0.0004404502, 0.0005527786,
+     0.0006937345, 0.0008706021, 0.0010925128, 0.0013709094, 0.0017201256, 0.0021581065,
+     0.0027073042, 0.0033957870, 0.0042586071, 0.0053394826, 0.0066928509, 0.0083863576,
+     0.0105038445, 0.0131488902, 0.0164489307, 0.0205599431, 0.0256715863, 0.0320125562,
+     0.0398556989, 0.0495221198, 0.0613831074, 0.0758581800, 0.0934070047, 0.1145124805,
+     0.1396521834, 0.1692560327, 0.2036499335, 0.2429886272, 0.2871859014, 0.3358556241,
+     0.3882805886, 0.4434251301, 0.5000000000, 0.5565748699, 0.6117194114, 0.6641443759,
+     0.7128140986, 0.7570113728, 0.7963500665, 0.8307439673, 0.8603478166, 0.8854875195,
+     0.9065929953, 0.9241418200, 0.9386168926, 0.9504778802, 0.9601443011, 0.9679874438,
+     0.9743284137, 0.9794400569, 0.9835510693, 0.9868511098, 0.9894961555, 0.9916136424,
+     0.9933071491, 0.9946605174, 0.9957413929, 0.9966042130, 0.9972926958, 0.9978418935,
+     0.9982798744, 0.9986290906, 0.9989074872, 0.9991293979, 0.9993062655, 0.9994472214,
+     0.9995595498, 0.9996490604, 0.9997203853, 0.9997772173, 0.9998225002, 0.9998585802,
+     0.9998873271, 0.9999102311, 0.9999284795, 0.9999430185, 0.9999546021});
+}
+
+TEST(LogisticTest, IvalidInputOutputType_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Shape input_shape = {1};
+  std::vector<float> input_data{10};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 256, 0);
+
+  Logistic kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(LogisticTest, IvalidQuantParam_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Shape input_shape = {2};
+  std::vector<float> input_data{-10, 10};
+  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-10, 10);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 255, 0);
+
+  Logistic kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.cpp
new file mode 100644
index 000000000..8d9760ff2
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.cpp
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/MaxPool2D.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+MaxPool2D::MaxPool2D(const Tensor *input, Tensor *output, const Pool2DParams &params)
+  : KernelWithParams<Pool2DParams>({input}, {output}, params)
+{
+}
+
+void MaxPool2D::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  assert(input()->shape().num_dims() == 4);
+  const Shape &input_shape = input()->shape();
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t depth = input_shape.dim(3);
+
+  const int32_t output_height =
+    computeOutputSize(_params.padding, input_height, _params.filter_height, _params.stride_height);
+  const int32_t output_width =
+    computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width);
+
+  _padding_height =
+    computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height);
+  _padding_width =
+    computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width);
+
+  output()->resize({batches, output_height, output_width, depth});
+  if (input()->element_type() == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
+    LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point());
+  }
+  else if (input()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+  }
+}
+
+void MaxPool2D::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    case DataType::S16:
+      evalSInt16();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void MaxPool2D::evalFloat() const
+{
+  float activation_min{};
+  float activation_max{};
+  calculateActivationRange(_params.activation, &activation_min, &activation_max);
+
+  tflite::PoolParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.filter_height = _params.filter_height;
+  params.filter_width = _params.filter_width;
+  params.float_activation_min = activation_min;
+  params.float_activation_max = activation_max;
+
+  tflite::reference_ops::MaxPool(params, getTensorShape(input()), getTensorData<float>(input()),
+                                 getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void MaxPool2D::evalQuantized() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::PoolParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.filter_height = _params.filter_height;
+  params.filter_width = _params.filter_width;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  tflite::reference_ops::MaxPool(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+                                 getTensorShape(output()), getTensorData<uint8_t>(output()));
+}
+
+void MaxPool2D::evalSInt16() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::PoolParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.filter_height = _params.filter_height;
+  params.filter_width = _params.filter_width;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  tflite::reference_integer_ops::MaxPool(
+    params, getTensorShape(input()), getTensorData<int16_t>(input()), //
+    getTensorShape(output()), getTensorData<int16_t>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.h b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.h
new file mode 100644
index 000000000..bb7666305
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_MAXPOOL2D_H
+#define LUCI_INTERPRETER_KERNELS_MAXPOOL2D_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class MaxPool2D : public KernelWithParams<Pool2DParams>
+{
+public:
+  MaxPool2D(const Tensor *input, Tensor *output, const Pool2DParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void evalSInt16() const;
+
+private:
+  int32_t _padding_height{};
+  int32_t _padding_width{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_MAXPOOL2D_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.test.cpp
new file mode 100644
index 000000000..44f2a222f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/MaxPool2D.test.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/MaxPool2D.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class MaxPool2DTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MaxPool2DTest, Float)
+{
+  Shape input_shape{1, 3, 5, 1};
+  std::vector<float> input_data{
+    1,  -1, 0,  -2, 2,  //
+    -7, -6, -5, -4, -3, //
+    5,  4,  3,  6,  7,  //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 3;
+  params.stride_height = 1;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  MaxPool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    1, 2, //
+    5, 6, //
+  };
+  std::initializer_list<int32_t> ref_output_shape{1, 2, 2, 1};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MaxPool2DTest, Uint8)
+{
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375, 15.9375);
+  std::vector<float> input_data{
+    0,  -6, 12, 4, //
+    -3, -2, 10, 7, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  MaxPool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.0, 6.0};
+  std::initializer_list<int32_t> ref_output_shape{1, 1, 2, 1};
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MaxPool2DTest, SInt16)
+{
+  Shape input_shape{1, 3, 5, 1};
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
+  std::vector<float> input_data{
+    1,  -1, 0,  -2, 2,  //
+    -7, -6, -5, -4, -3, //
+    5,  4,  3,  6,  7,  //
+  };
+  std::vector<float> ref_output_data{
+    1, 2, //
+    5, 6, //
+  };
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, 0.2, 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.2, 0);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 3;
+  params.stride_height = 1;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  MaxPool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.cpp
new file mode 100644
index 000000000..b102b5e27
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Maximum.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Maximum::Maximum(const Tensor *input1, const Tensor *input2, Tensor *output)
+  : Kernel({input1, input2}, {output})
+{
+}
+
+void Maximum::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type())
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type())
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Maximum::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalMaximum<float>();
+      break;
+    case DataType::U8:
+      evalMaximum<uint8_t>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T> inline void Maximum::evalMaximum() const
+{
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+                        getTensorShape(input2()), getTensorData<T>(input2()),
+                        getTensorShape(output()), getTensorData<T>(output()),
+                        [](T x, T y) { return std::max(x, y); });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.h b/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.h
new file mode 100644
index 000000000..3c99e69c7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_MAXIMUM_H
+#define LUCI_INTERPRETER_KERNELS_MAXIMUM_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Maximum : public Kernel
+{
+public:
+  Maximum(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> inline void evalMaximum() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_MAXIMUM_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.test.cpp
new file mode 100644
index 000000000..e4a505b03
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Maximum.test.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Maximum.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class MaximumTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MaximumTest, Float)
+{
+  Shape input_shape{3, 1, 2};
+  std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
+  std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43};
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, _memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data2, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Maximum kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{1.0, 0.0, 1.0, 12.0, -2.0, -1.43};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(MaximumTest, Uint8)
+{
+  Shape input_shape{3, 1, 2};
+  std::vector<uint8_t> input_data1{1, 0, 2, 11, 2, 23};
+  std::vector<uint8_t> input_data2{0, 0, 1, 12, 255, 1};
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::U8>(input_shape, input_data1, _memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::U8>(input_shape, input_data2, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Maximum kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<int32_t> ref_output_shape{2, 4};
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray({1, 0, 2, 12, 255, 23}));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Mean.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Mean.cpp
new file mode 100644
index 000000000..8e65e0d6d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Mean.cpp
@@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Mean.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reduce.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+static void resolveAxes(const int32_t *axes_data, int num_axes, tflite::MeanParams *params)
+{
+  params->axis_count = num_axes;
+  for (int i = 0; i < num_axes; ++i)
+  {
+    params->axis[i] = static_cast<int16>(axes_data[i]);
+  }
+  for (int i = num_axes; i < 4; ++i)
+  {
+    params->axis[i] = 1;
+  }
+}
+
+// Returns the number of axes that will be reduced. Removes duplicates.
+static int getAxisReductionCount(const int32_t *axes_data, int num_axes, int input_num_dims)
+{
+  int reduction_count = num_axes;
+  for (int i = 0; i < num_axes; ++i)
+  {
+    int current = axes_data[i] >= 0 ? axes_data[i] : axes_data[i] + input_num_dims;
+    assert(current >= 0 && current < input_num_dims);
+    for (int j = 0; j < i; j++)
+    {
+      int previous = axes_data[j] >= 0 ? axes_data[j] : axes_data[j] + input_num_dims;
+      // This checks for duplicate axis
+      if (current == previous)
+      {
+        --reduction_count;
+        break;
+      }
+    }
+  }
+  return reduction_count;
+}
+
+static Shape getOutputShape(const Shape &input_shape, const int32_t *axes_data, int num_axes,
+                            bool keep_dims)
+{
+  int input_num_dims = input_shape.num_dims();
+  if (input_num_dims == 0)
+  {
+    return Shape(0);
+  }
+
+  if (keep_dims)
+  {
+    Shape output_shape(input_num_dims);
+    for (int idx = 0; idx < input_num_dims; ++idx)
+    {
+      bool is_axis = false;
+      for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
+      {
+        if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
+        {
+          is_axis = true;
+          break;
+        }
+      }
+      if (is_axis)
+      {
+        output_shape.dim(idx) = 1;
+      }
+      else
+      {
+        output_shape.dim(idx) = input_shape.dim(idx);
+      }
+    }
+    return output_shape;
+  }
+  else
+  {
+    int num_reduce_axes = getAxisReductionCount(axes_data, num_axes, input_num_dims);
+    Shape output_shape(input_num_dims - num_reduce_axes);
+    int num_skip_axes = 0;
+    for (int idx = 0; idx < input_num_dims; ++idx)
+    {
+      bool is_axis = false;
+      for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
+      {
+        if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
+        {
+          ++num_skip_axes;
+          is_axis = true;
+          break;
+        }
+      }
+      if (!is_axis)
+      {
+        output_shape.dim(idx - num_skip_axes) = input_shape.dim(idx);
+      }
+    }
+    return output_shape;
+  }
+}
+
+Mean::Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
+           Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams &params)
+  : KernelWithParams<ReducerParams>({input, axes}, {output, temp_index, resolved_axes, temp_sum},
+                                    params)
+{
+}
+
+void Mean::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(axes()->element_type() == DataType::S32);
+  if (input()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+  }
+
+  const Shape &input_shape = input()->shape();
+  int input_num_dims = input_shape.num_dims();
+
+  const auto *axes_data = getTensorData<int32_t>(axes());
+  int num_axes = axes()->shape().num_elements();
+  assert(num_axes <= 4);
+
+  Shape output_shape = getOutputShape(input_shape, axes_data, num_axes, _params.keep_dims);
+  output()->resize(output_shape);
+
+  tflite::MeanParams params{};
+  resolveAxes(axes_data, num_axes, &params);
+  _need_temporaries = !(
+    _params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
+    ((params.axis[0] == 1 && params.axis[1] == 2) || (params.axis[0] == 2 && params.axis[1] == 1)));
+  if (_need_temporaries)
+  {
+    auto temp_index = getOutputTensors()[1];
+    auto resolved_axes = getOutputTensors()[2];
+    auto temp_sum = getOutputTensors()[3];
+
+    temp_index->resize(Shape(input_num_dims));
+    resolved_axes->resize(Shape(num_axes));
+    temp_sum->resize(output()->shape());
+  }
+  else
+  {
+    auto temp_index = getOutputTensors()[1];
+    auto resolved_axes = getOutputTensors()[2];
+    auto temp_sum = getOutputTensors()[3];
+
+    temp_index->set_allocatable(false);
+    resolved_axes->set_allocatable(false);
+    temp_sum->set_allocatable(false);
+  }
+}
+
+void Mean::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Mean::evalFloat() const
+{
+  const Shape &input_shape = input()->shape();
+  int input_num_dims = input_shape.num_dims();
+  const auto *axes_data = getTensorData<int32_t>(axes());
+  int num_axes = axes()->shape().num_elements();
+
+  tflite::MeanParams params{};
+  resolveAxes(axes_data, num_axes, &params);
+
+  auto temp_index = getOutputTensors()[1];
+  auto resolved_axes = getOutputTensors()[2];
+  auto temp_sum = getOutputTensors()[3];
+
+  // Defer to specialized implementation for 4D Mean across axes 1 & 2.
+  if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
+      ((params.axis[0] == 1 && params.axis[1] == 2) ||
+       (params.axis[0] == 2 && params.axis[1] == 1)))
+  {
+    tflite::reference_ops::Mean(params, getTensorShape(input()), getTensorData<float>(input()),
+                                getTensorShape(output()), getTensorData<float>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Mean(getTensorData<float>(input()), getTensorShape(input()).DimsData(),
+                                input()->shape().num_dims(), getTensorData<float>(output()),
+                                getTensorShape(output()).DimsData(), output()->shape().num_dims(),
+                                axes_data, num_axes, _params.keep_dims,
+                                getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
+                                getTensorData<float>(temp_sum));
+  }
+}
+
+void Mean::evalQuantized() const
+{
+  const Shape &input_shape = input()->shape();
+  int input_num_dims = input_shape.num_dims();
+  const auto *axes_data = getTensorData<int32_t>(axes());
+  int num_axes = axes()->shape().num_elements();
+
+  tflite::MeanParams params{};
+  resolveAxes(axes_data, num_axes, &params);
+
+  auto temp_index = getOutputTensors()[1];
+  auto resolved_axes = getOutputTensors()[2];
+  auto temp_sum = getOutputTensors()[3];
+
+  // Defer to specialized implementation for 4D Mean across axes 1 & 2.
+  if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
+      ((params.axis[0] == 1 && params.axis[1] == 2) ||
+       (params.axis[0] == 2 && params.axis[1] == 1)))
+  {
+    tflite::reference_ops::Mean(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+                                input()->zero_point(), input()->scale(), getTensorShape(output()),
+                                getTensorData<uint8_t>(output()), output()->zero_point(),
+                                output()->scale());
+  }
+  else if (input()->zero_point() == output()->zero_point() && input()->scale() == output()->scale())
+  {
+    tflite::reference_ops::Mean(getTensorData<uint8_t>(input()), getTensorShape(input()).DimsData(),
+                                input()->shape().num_dims(), getTensorData<uint8_t>(output()),
+                                getTensorShape(output()).DimsData(), output()->shape().num_dims(),
+                                axes_data, num_axes, _params.keep_dims,
+                                getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
+                                getTensorData<int>(temp_sum));
+  }
+  else
+  {
+    tflite::reference_ops::QuantizedMeanOrSum<>(
+      getTensorData<uint8_t>(input()), input()->zero_point(), input()->scale(),
+      getTensorShape(input()).DimsData(), input()->shape().num_dims(),
+      getTensorData<uint8_t>(output()), output()->zero_point(), output()->scale(),
+      getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
+      _params.keep_dims, getTensorData<int>(temp_index), getTensorData<int>(resolved_axes),
+      getTensorData<int>(temp_sum),
+      /*compute_sum=*/false);
+  }
+}
+
+void Mean::evalQuantizedS16() const
+{
+  const auto *input_data = getTensorData<int16_t>(input());
+  auto *output_data = getTensorData<int16_t>(output());
+
+  const Shape &input_shape = input()->shape();
+  const Shape &output_shape = output()->shape();
+
+  const auto *axes_data = getTensorData<int32_t>(axes());
+  const int num_axes = axes()->shape().num_elements();
+
+  constexpr int32_t output_min = -std::numeric_limits<int16_t>::max();
+  constexpr int32_t output_max = std::numeric_limits<int16_t>::max();
+
+  // Defer to specialized implementation for 4D Mean across axes 1 & 2.
+  if (_params.keep_dims && input_shape.num_dims() == 4 && num_axes == 2 &&
+      ((axes_data[0] == 1 && axes_data[1] == 2) || (axes_data[0] == 2 && axes_data[1] == 1)))
+  {
+    const int32_t batches = input_shape.dim(0);
+    const int32_t input_height = input_shape.dim(1);
+    const int32_t input_width = input_shape.dim(2);
+    const int32_t depth = input_shape.dim(3);
+    assert(output_shape.num_dims() == 4);
+    assert(output_shape.dim(0) == batches);
+    assert(output_shape.dim(1) == 1);
+    assert(output_shape.dim(2) == 1);
+    assert(output_shape.dim(3) == depth);
+
+    const double real_multiplier =
+      static_cast<double>(input()->scale()) / static_cast<double>(output()->scale());
+
+    int32_t output_multiplier{};
+    int output_shift{};
+    quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+    const int32_t num_elements_in_axes = input_height * input_width;
+
+    for (int32_t batch = 0; batch < batches; ++batch)
+    {
+      for (int32_t c = 0; c < depth; ++c)
+      {
+        int32_t acc = 0;
+        for (int32_t in_y = 0; in_y < input_height; ++in_y)
+        {
+          for (int32_t in_x = 0; in_x < input_width; ++in_x)
+          {
+            acc += input_data[calcOffset(input_shape, batch, in_y, in_x, c)];
+          }
+        }
+        int32_t scaled_acc =
+          tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+        // Divide by the number of elements rounding to the nearest integer.
+        scaled_acc = scaled_acc > 0
+                       ? (scaled_acc + num_elements_in_axes / 2) / num_elements_in_axes
+                       : (scaled_acc - num_elements_in_axes / 2) / num_elements_in_axes;
+
+        scaled_acc = std::max(scaled_acc, output_min);
+        scaled_acc = std::min(scaled_acc, output_max);
+
+        output_data[calcOffset(output_shape, batch, 0, 0, c)] = scaled_acc;
+      }
+    }
+  }
+  else
+  {
+    throw std::runtime_error("Unsupported configuration.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Mean.h b/compiler/luci-micro/luci-interpreter/src/kernels/Mean.h
new file mode 100644
index 000000000..ed07ae561
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Mean.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_MEAN_H
+#define LUCI_INTERPRETER_KERNELS_MEAN_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <memory>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Mean : public KernelWithParams<ReducerParams>
+{
+public:
+  Mean(const Tensor *input, const Tensor *axes, Tensor *output, Tensor *temp_index,
+       Tensor *resolved_axes, Tensor *temp_sum, const ReducerParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *axes() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void evalQuantizedS16() const;
+
+private:
+  bool _need_temporaries = false;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_MEAN_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Mean.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Mean.test.cpp
new file mode 100644
index 000000000..d2c00935a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Mean.test.cpp
@@ -0,0 +1,240 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Mean.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class MeanTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MeanTest, FloatKeepDims)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+  std::vector<int32_t> axis_data{0, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ReducerParams params{};
+  params.keep_dims = true;
+
+  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+              params);
+  kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(temp_sum);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{10.5, 12.5, 14.5};
+  std::initializer_list<int32_t> ref_output_shape{1, 3, 1};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MeanTest, FloatKeepDims4DMean)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+  std::vector<int32_t> axis_data{1, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 2, 3, 2}, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ReducerParams params{};
+  params.keep_dims = true;
+
+  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+              params);
+  kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(temp_sum);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{6, 7, 18, 19};
+  std::initializer_list<int32_t> ref_output_shape{2, 1, 1, 2};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MeanTest, FloatNotKeepDims)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+  std::vector<int32_t> axis_data{1, 0, -3, -3};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({4}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ReducerParams params{};
+  params.keep_dims = false;
+
+  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+              params);
+  kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(temp_sum);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{12, 13};
+  std::initializer_list<int32_t> ref_output_shape{2};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MeanTest, Uint8KeepDims)
+{
+  float kQuantizedTolerance = getTolerance(-1.0, 1.0, 255);
+  std::vector<float> input_data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6};
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
+
+  std::vector<int32_t> axis_data{1};
+  Tensor input_tensor = makeInputTensor<DataType::U8>({3, 2}, quant_param.first, quant_param.second,
+                                                      input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor temp_sum(DataType::U8, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  ReducerParams params{};
+  params.keep_dims = true;
+
+  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+              params);
+  kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(temp_sum);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.3, 0.35, 0.55};
+  std::initializer_list<int32_t> ref_output_shape{3, 1};
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MeanTest, Uint8NotKeepDims)
+{
+  float kQuantizedTolerance = getTolerance(-1.0, 1.0, 255);
+  std::vector<float> input_data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6};
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
+
+  std::vector<int32_t> axis_data{1};
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 3, 2}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  ReducerParams params{};
+  params.keep_dims = false;
+
+  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+              params);
+  kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(temp_sum);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.4, 0.4};
+  std::initializer_list<int32_t> ref_output_shape{1, 2};
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MeanTest, SInt16KeepDims4D)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+  std::vector<int32_t> axes_data{1, 2};
+  std::vector<float> ref_output_data{6, 7, 18, 19};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({2, 2, 3, 2}, 0.25, 0, input_data, _memory_manager.get());
+  Tensor axes_tensor = makeInputTensor<DataType::S32>({2}, axes_data, _memory_manager.get());
+  Tensor temp_index(DataType::S32, Shape({}), {}, "");
+  Tensor resolved_axes(DataType::S32, Shape({}), {}, "");
+  Tensor temp_sum(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.2, 0);
+
+  ReducerParams params{};
+  params.keep_dims = true;
+
+  Mean kernel(&input_tensor, &axes_tensor, &output_tensor, &temp_index, &resolved_axes, &temp_sum,
+              params);
+  kernel.configure();
+  _memory_manager->allocate_memory(temp_index);
+  _memory_manager->allocate_memory(resolved_axes);
+  _memory_manager->allocate_memory(temp_sum);
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 1, 1, 2}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.cpp
new file mode 100644
index 000000000..5d3dcde72
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Minimum.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Minimum::Minimum(const Tensor *input1, const Tensor *input2, Tensor *output)
+  : Kernel({input1, input2}, {output})
+{
+}
+
+void Minimum::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type())
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type())
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Minimum::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalMinimum<float>();
+      break;
+    case DataType::U8:
+      evalMinimum<uint8_t>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T> inline void Minimum::evalMinimum() const
+{
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+                        getTensorShape(input2()), getTensorData<T>(input2()),
+                        getTensorShape(output()), getTensorData<T>(output()),
+                        [](T x, T y) { return std::min(x, y); });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.h b/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.h
new file mode 100644
index 000000000..5ff4035b4
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_MINIMUM_H
+#define LUCI_INTERPRETER_KERNELS_MINIMUM_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Minimum : public Kernel
+{
+public:
+  Minimum(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> inline void evalMinimum() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_MINIMUM_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.test.cpp
new file mode 100644
index 000000000..9a143643f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Minimum.test.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Minimum.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class MinimumTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MinimumTest, Float)
+{
+  Shape input_shape{3, 1, 2};
+  std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
+  std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43};
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, _memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data2, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Minimum kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{-1.0, 0.0, -1.0, 11.0, -3.0, -1.44};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(MinimumTest, Uint8)
+{
+  Shape input_shape{3, 1, 2};
+  std::vector<uint8_t> input_data1{1, 0, 2, 11, 2, 23};
+  std::vector<uint8_t> input_data2{0, 0, 1, 12, 255, 1};
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::U8>(input_shape, input_data1, _memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::U8>(input_shape, input_data2, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Minimum kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<int32_t> ref_output_shape{2, 4};
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray({0, 0, 1, 11, 2, 1}));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.cpp
new file mode 100644
index 000000000..bae1eac70
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.cpp
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/MirrorPad.h"
+
+#include "kernels/Utils.h"
+
+#include <limits>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+MirrorPad::MirrorPad(const Tensor *input, const Tensor *paddings, Tensor *output,
+                     const MirrorPadParams &params)
+  : KernelWithParams<MirrorPadParams>({input, paddings}, {output}, params)
+{
+}
+
+void MirrorPad::configure()
+{
+  const Shape &input_shape = input()->shape();
+  const int num_dims = input_shape.num_dims();
+
+  if (num_dims > 4)
+    throw std::runtime_error("Unsupported number of dimensions.");
+
+  assert(output()->element_type() == input()->element_type());
+  assert(paddings()->element_type() == DataType::S32);
+  // Paddings shape should be [N, 2].
+  assert(paddings()->shape().num_dims() == 2);
+  assert(paddings()->shape().dim(0) == num_dims);
+  assert(paddings()->shape().dim(1) == 2);
+
+  Shape output_shape(num_dims);
+  const auto *paddings_data = getTensorData<int32_t>(paddings());
+  for (int i = 0; i < num_dims; ++i)
+  {
+    const int32_t padding_before = paddings_data[i * 2];
+    const int32_t padding_after = paddings_data[i * 2 + 1];
+    assert(padding_before >= 0 && padding_after >= 0);
+    output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after;
+  }
+
+  output()->resize(output_shape);
+}
+
+template <typename T>
+inline void MirrorPadImpl(const Tensor &input, const Tensor &paddings, MirrorPadMode mode,
+                          Tensor &output);
+
+void MirrorPad::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+    {
+      MirrorPadImpl<float>(*input(), *paddings(), params().mode, *output());
+      break;
+    }
+    case DataType::U8:
+    {
+      assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min());
+      assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max());
+
+      MirrorPadImpl<uint8_t>(*input(), *paddings(), params().mode, *output());
+      break;
+    }
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T>
+inline void MirrorPadImpl(const Tensor &input, const Tensor &paddings, MirrorPadMode mode,
+                          Tensor &output)
+{
+  auto const input_dims = input.shape().num_dims();
+  auto const input_data = input.data<T>();
+  auto const paddings_data = paddings.data<int32_t>();
+  auto const output_data = output.data<T>();
+
+  auto const input_b = input_dims > 3 ? input.shape().dim(input_dims - 4) : 1;
+  auto const input_h = input_dims > 2 ? input.shape().dim(input_dims - 3) : 1;
+  auto const input_w = input_dims > 1 ? input.shape().dim(input_dims - 2) : 1;
+  auto const input_d = input.shape().dim(input_dims - 1);
+
+  auto const input_h_offset = input_d * input_w;
+  auto const input_b_offset = input_h_offset * input_h;
+
+  auto const output_b = input_dims > 3 ? output.shape().dim(input_dims - 4) : 1;
+  auto const output_h = input_dims > 2 ? output.shape().dim(input_dims - 3) : 1;
+  auto const output_w = input_dims > 1 ? output.shape().dim(input_dims - 2) : 1;
+  auto const output_d = output.shape().dim(input_dims - 1);
+
+  auto const left_b_pad = paddings_data[2 * (input_dims - 4)];
+  auto const left_h_pad = paddings_data[2 * (input_dims - 3)];
+  auto const left_w_pad = paddings_data[2 * (input_dims - 2)];
+  auto const left_d_pad = paddings_data[2 * (input_dims - 1)];
+
+  auto const right_b_pad = paddings_data[2 * (input_dims - 4) + 1];
+  auto const right_h_pad = paddings_data[2 * (input_dims - 3) + 1];
+  auto const right_w_pad = paddings_data[2 * (input_dims - 2) + 1];
+  auto const right_d_pad = paddings_data[2 * (input_dims - 1) + 1];
+
+  const auto positive_mod = [](auto a, auto b) { return (a % b + b) % b; };
+  const auto offset_index = [input_d, input_h_offset, input_b_offset](auto d, auto w, auto h,
+                                                                      auto b) {
+    return d + w * input_d + h * input_h_offset + b * input_b_offset;
+  };
+
+  const auto symmetric_dim = [&positive_mod](auto i, auto left_pad, auto input) {
+    bool reflected = (((i < left_pad ? i + 1 - input : i) - left_pad) / input & 1) == 1;
+    return positive_mod(reflected ? input + left_pad - i - 1 : i - left_pad, input);
+  };
+
+  const T *in_ptr = input_data;
+  T *out_ptr = output_data;
+
+  for (int32_t b = 0; b < output_b; ++b)
+  {
+    for (int32_t h = 0; h < output_h; ++h)
+    {
+      for (int32_t w = 0; w < output_w; ++w)
+      {
+        for (int32_t d = 0; d < output_d; ++d)
+        {
+          if (b < left_b_pad || b >= output_b - right_b_pad || //
+              h < left_h_pad || h >= output_h - right_h_pad || //
+              w < left_w_pad || w >= output_w - right_w_pad || //
+              d < left_d_pad || d >= output_d - right_d_pad)
+          {
+            if (mode == MirrorPadMode::REFLECT)
+            {
+              *out_ptr++ = input_data[offset_index(
+                positive_mod(d - left_d_pad, input_d), positive_mod(w - left_w_pad, input_w),
+                positive_mod(h - left_h_pad, input_h), positive_mod(b - left_b_pad, input_b))];
+            }
+            else
+            {
+              *out_ptr++ = input_data[offset_index(
+                symmetric_dim(d, left_d_pad, input_d), symmetric_dim(w, left_w_pad, input_w),
+                symmetric_dim(h, left_h_pad, input_h), symmetric_dim(b, left_b_pad, input_b))];
+            }
+          }
+          else
+          {
+            *out_ptr++ = *in_ptr++;
+          }
+        }
+      }
+    }
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.h b/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.h
new file mode 100644
index 000000000..d3e6e858a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_MIRROR_PAD_H
+#define LUCI_INTERPRETER_KERNELS_MIRROR_PAD_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class MirrorPad : public KernelWithParams<MirrorPadParams>
+{
+public:
+  MirrorPad(const Tensor *input, const Tensor *paddings, Tensor *output,
+            const MirrorPadParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *paddings() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_MIRROR_PAD_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.test.cpp
new file mode 100644
index 000000000..740d8cb22
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/MirrorPad.test.cpp
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/MirrorPad.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class MirrorPadTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  void Execute(const Tensor &input, const Tensor &padding, Tensor &output, MirrorPadMode mode)
+  {
+    MirrorPadParams params{};
+    params.mode = mode;
+
+    MirrorPad kernel(&input, &padding, &output, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output);
+    kernel.execute();
+  }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MirrorPadTest, FloatReflect)
+{
+  Shape input_shape = {1, 2, 2, 1};
+  Shape padding_shape = {4, 2};
+
+  std::vector<float> input_data{1.0f, 2.0f,  //
+                                3.0f, 4.0f}; //
+  std::vector<int> padding_data{0, 0, 2, 1, 1, 2, 0, 0};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor padding_tensor =
+    makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT);
+
+  std::vector<float> ref_output_data{2.0f, 1.0f, 2.0f, 1.0f, 2.0f,  //
+                                     4.0f, 3.0f, 4.0f, 3.0f, 4.0f,  //
+                                     2.0f, 1.0f, 2.0f, 1.0f, 2.0f,  //
+                                     4.0f, 3.0f, 4.0f, 3.0f, 4.0f,  //
+                                     2.0f, 1.0f, 2.0f, 1.0f, 2.0f}; //
+  std::initializer_list<int32_t> ref_output_shape{1, 5, 5, 1};
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, FloatSymmetric)
+{
+  Shape input_shape = {1, 2, 2, 1};
+  Shape padding_shape = {4, 2};
+
+  std::vector<float> input_data{1.0f, 2.0f,  //
+                                3.0f, 4.0f}; //
+  std::vector<int> padding_data{0, 0, 2, 1, 1, 2, 0, 0};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor padding_tensor =
+    makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC);
+
+  std::vector<float> ref_output_data{3.0, 3.0, 4.0, 4.0, 3.0,  //
+                                     1.0, 1.0, 2.0, 2.0, 1.0,  //
+                                     1.0, 1.0, 2.0, 2.0, 1.0,  //
+                                     3.0, 3.0, 4.0, 4.0, 3.0,  //
+                                     3.0, 3.0, 4.0, 4.0, 3.0}; //
+  std::initializer_list<int32_t> ref_output_shape{1, 5, 5, 1};
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, FloatSymmetric2Dim)
+{
+  Shape input_shape = {3, 1};
+  Shape padding_shape = {2, 2};
+
+  std::vector<float> input_data{1.0f, 2.0f, 3.0f};
+  std::vector<int> padding_data{1, 2, 0, 0};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor padding_tensor =
+    makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC);
+
+  std::vector<float> ref_output_data{1.0, 1.0, 2.0, 3.0, 3.0, 2.0};
+  std::initializer_list<int32_t> ref_output_shape{6, 1};
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, Uint8Reflect)
+{
+  Shape input_shape = {1, 2, 3, 1};
+  Shape padding_shape = {4, 2};
+
+  float quant_tolerance = getTolerance(0.0f, 6.0f, 255);
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(0.0f, 6.0f);
+
+  std::vector<float> input_data{1.0f, 2.0f, 3.0f,  //
+                                4.0f, 5.0f, 6.0f}; //
+  std::vector<int> padding_data{0, 0, 2, 1, 1, 3, 0, 0};
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+
+  Tensor padding_tensor =
+    makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT);
+
+  std::vector<float> ref_output_data{
+    3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, //
+    6.0f, 4.0f, 5.0f, 6.0f, 4.0f, 5.0f, 6.0f, //
+    3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, //
+    6.0f, 4.0f, 5.0f, 6.0f, 4.0f, 5.0f, 6.0f, //
+    3.0f, 1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f, //
+  };
+  std::initializer_list<int32_t> ref_output_shape{1, 5, 7, 1};
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, quant_tolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, Uint8Symmetric)
+{
+  Shape input_shape = {1, 2, 3, 1};
+  Shape padding_shape = {4, 2};
+
+  float quant_tolerance = getTolerance(0.0f, 6.0f, 255);
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(0.0f, 6.0f);
+
+  std::vector<float> input_data{1.0f, 2.0f, 3.0f,  //
+                                4.0f, 5.0f, 6.0f}; //
+  std::vector<int> padding_data{0, 0, 2, 1, 1, 3, 0, 0};
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    input_shape, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+
+  Tensor padding_tensor =
+    makeInputTensor<DataType::S32>(padding_shape, padding_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::SYMMETRIC);
+
+  std::vector<float> ref_output_data{
+    4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, //
+    1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f, //
+    1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 2.0f, 1.0f, //
+    4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, //
+    4.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, //
+  };
+  std::initializer_list<int32_t> ref_output_shape{1, 5, 7, 1};
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, quant_tolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(MirrorPadTest, UnsupportedDim_NEG)
+{
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 1, 1, 1, 1}, {1.0f}, _memory_manager.get());
+  Tensor padding_tensor =
+    makeInputTensor<DataType::S32>({5, 2}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  EXPECT_ANY_THROW(Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT));
+}
+
+TEST_F(MirrorPadTest, InvalidInputType_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor padding_tensor = makeInputTensor<DataType::S32>({1, 2}, {0, 0}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  EXPECT_ANY_THROW(Execute(input_tensor, padding_tensor, output_tensor, MirrorPadMode::REFLECT));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Mul.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Mul.cpp
new file mode 100644
index 000000000..531fb4fa1
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Mul.cpp
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Mul.h"
+
+#include "kernels/BinaryOpCommon.h"
+#include "kernels/Utils.h"
+
+#include "PALMul.h"
+
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Mul::Mul(const Tensor *input1, const Tensor *input2, Tensor *output, const MulParams &params)
+  : KernelWithParams<MulParams>({input1, input2}, {output}, params)
+{
+}
+
+void Mul::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == input1()->element_type());
+  if (input1()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(input1()->zero_points().size() == 1 &&
+                           input2()->zero_points().size() == 1)
+    LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 &&
+                           output()->zero_point() == 0);
+  }
+
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Mul::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Mul::evalFloat() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<float>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    luci_interpreter_pal::BroadcastMul4DSlow(
+      params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+      getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+  }
+  else
+  {
+    luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()),
+                              getTensorShape(input2()), getTensorData<float>(input2()),
+                              getTensorShape(output()), getTensorData<float>(output()));
+  }
+}
+
+template <typename T> void Mul::evalInteger() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<T>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    luci_interpreter_pal::BroadcastMul4DSlow(
+      params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+      getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+  }
+  else
+  {
+    luci_interpreter_pal::Mul(params, getTensorShape(input1()), getTensorData<T>(input1()),
+                              getTensorShape(input2()), getTensorData<T>(input2()),
+                              getTensorShape(output()), getTensorData<T>(output()));
+  }
+}
+
+void Mul::evalQuantizedS16() const
+{
+  const auto input1_scale = static_cast<double>(input1()->scale());
+  const auto input2_scale = static_cast<double>(input2()->scale());
+  const auto output_scale = static_cast<double>(output()->scale());
+
+  const double real_multiplier = input1_scale * input2_scale / output_scale;
+
+  int32_t output_multiplier;
+  int output_shift;
+  quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  auto fn = [output_multiplier, output_shift, activation_min, activation_max](int16_t input1_val,
+                                                                              int16_t input2_val) {
+    int32_t output = static_cast<int32_t>(input1_val) * static_cast<int32_t>(input2_val);
+    output = tflite::MultiplyByQuantizedMultiplier(output, output_multiplier, output_shift);
+    output = std::max(output, activation_min);
+    output = std::min(output, activation_max);
+    return static_cast<int16_t>(output);
+  };
+
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<int16_t>(input1()),
+                        getTensorShape(input2()), getTensorData<int16_t>(input2()),
+                        getTensorShape(output()), getTensorData<int16_t>(output()), fn);
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Mul.h b/compiler/luci-micro/luci-interpreter/src/kernels/Mul.h
new file mode 100644
index 000000000..c0cf817df
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Mul.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_MUL_H
+#define LUCI_INTERPRETER_KERNELS_MUL_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <cstdint>
+#include <vector>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Mul : public KernelWithParams<MulParams>
+{
+public:
+  Mul(const Tensor *input1, const Tensor *input2, Tensor *output, const MulParams &params);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  template <typename T> void evalInteger() const;
+  void evalQuantizedS16() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_MUL_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Mul.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Mul.test.cpp
new file mode 100644
index 000000000..fc0e60614
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Mul.test.cpp
@@ -0,0 +1,292 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Mul.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class MulTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(MulTest, Float)
+{
+  Shape base_shape = {2, 3, 1, 2};
+  std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  std::vector<std::vector<float>> test_outputs = {
+    {0.00f, 0.69f, 0.12f, 1.15f, 0.00f, 2.07f, 0.18f, 0.15f, 0.00f, 0.25f, 0.90f, 0.45f,
+     0.16f, 0.00f, 0.00f, 0.00f, 0.80f, 0.00f, 0.24f, 0.84f, 0.00f, 1.40f, 1.20f, 2.52f,
+     0.00f, 0.00f, 0.64f, 0.00f, 0.00f, 0.00f, 0.14f, 0.00f, 0.00f, 0.00f, 0.70f, 0.00f},
+    {0.00f, 0.69f, 0.00f, 0.25f, 0.80f, 0.00f, 0.24f, 0.84f, 0.64f, 0.00f, 0.70f, 0.00f},
+    {0.00f, 0.46f, 0.00f, 0.69f, 0.12f, 0.00f, 0.18f, 0.10f, 0.27f, 0.15f, 0.00f, 0.00f,
+     0.16f, 0.00f, 0.24f, 0.00f, 0.00f, 0.44f, 0.60f, 1.40f, 1.20f, 2.80f, 1.08f, 2.52f,
+     0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.35f, 0.00f, 0.70f, 0.00f, 0.63f, 0.00f},
+    {0.00f, 0.46f, 0.27f, 0.15f, 0.00f, 0.44f, 0.60f, 1.40f, 0.00f, 0.00f, 0.63f, 0.00f}};
+  std::vector<float> input1_data{-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
+                                 1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
+  std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor =
+      makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+    Tensor input2_tensor =
+      makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get());
+    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+    MulParams params{};
+    params.activation = Activation::RELU;
+
+    Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
+      << "With shape number " << i;
+  }
+  // Re-run with exchanged inputs.
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor =
+      makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get());
+    Tensor input2_tensor =
+      makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+    MulParams params{};
+    params.activation = Activation::RELU;
+
+    Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
+      << "With shape number " << i;
+  }
+}
+
+template <loco::DataType DType> void checkInteger(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  Shape base_shape = {2, 3, 1, 2};
+  std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+
+  dtype max_value = std::numeric_limits<dtype>::max();
+  dtype res_max = max_value - max_value % 10;
+
+  std::vector<std::vector<dtype>> test_outputs = {
+    {8,  0, 20,  0, 4,  30,  //
+     16, 0, 40,  3, 8,  0,   //
+     0,  0, 0,   6, 0,  0,   //
+     4,  0, 10,  9, 2,  0,   //
+     40, 0, 100, 0, 20, 150, //
+     28, 0, 70,  0, 14, res_max},
+    {8, 0, 40, 3, 0, 0, 4, 0, 100, 0, 14, res_max},
+    {8,  12,     0, 0, 20, 30, 16, 0, 0, 0,  40, 0,   0,   0, 0, 0,  0,
+     0,  0,      9, 2, 0,  10, 0,  0, 0, 20, 30, 100, 150, 0, 0, 14, max_value / 10 * 2,
+     70, res_max},
+    {8, 12, 0, 0, 0, 0, 0, 9, 20, 30, 70, res_max}};
+  std::vector<dtype> input1_data{2, 3, 4, -1, -3, -2, 1, -3, 10, 15, 7, max_value / 10};
+  std::vector<dtype> input2_data{4, 0, 10, -3, 2, 10};
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+    Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+    Tensor output_tensor = makeOutputTensor(DType);
+
+    MulParams params{};
+    params.activation = Activation::RELU;
+
+    Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+      << "With shape number " << i;
+  }
+  // Re-run with exchanged inputs.
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+    Tensor input2_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+    Tensor output_tensor = makeOutputTensor(DType);
+
+    MulParams params{};
+    params.activation = Activation::RELU;
+
+    Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+      << "With shape number " << i;
+  }
+}
+
+TEST_F(MulTest, SInt64)
+{
+  checkInteger<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(MulTest, SInt32)
+{
+  checkInteger<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(MulTest, SInt16)
+{
+  Shape base_shape = {2, 3, 1, 2};
+  std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  std::vector<std::vector<int32_t>> ref_output_shapes{
+    {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
+
+  std::vector<float> input1_data{-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
+                                 1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
+  std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+  std::vector<std::vector<float>> ref_outputs = {
+    {0.00f, 0.69f, 0.12f, 1.15f, 0.00f, 2.07f, 0.18f, 0.15f, 0.00f, 0.25f, 0.90f, 0.45f,
+     0.16f, 0.00f, 0.00f, 0.00f, 0.80f, 0.00f, 0.24f, 0.84f, 0.00f, 1.40f, 1.20f, 2.52f,
+     0.00f, 0.00f, 0.64f, 0.00f, 0.00f, 0.00f, 0.14f, 0.00f, 0.00f, 0.00f, 0.70f, 0.00f},
+    {0.00f, 0.69f, 0.00f, 0.25f, 0.80f, 0.00f, 0.24f, 0.84f, 0.64f, 0.00f, 0.70f, 0.00f},
+    {0.00f, 0.46f, 0.00f, 0.69f, 0.12f, 0.00f, 0.18f, 0.10f, 0.27f, 0.15f, 0.00f, 0.00f,
+     0.16f, 0.00f, 0.24f, 0.00f, 0.00f, 0.44f, 0.60f, 1.40f, 1.20f, 2.80f, 1.08f, 2.52f,
+     0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.35f, 0.00f, 0.70f, 0.00f, 0.63f, 0.00f},
+    {0.00f, 0.46f, 0.27f, 0.15f, 0.00f, 0.44f, 0.60f, 1.40f, 0.00f, 0.00f, 0.63f, 0.00f}};
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DataType::S16>(base_shape, 3.0 / 32767, 0, input1_data,
+                                                          _memory_manager.get());
+    Tensor input2_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0,
+                                                          input2_data, _memory_manager.get());
+    Tensor output_tensor = makeOutputTensor(DataType::S16, 4.0 / 32767, 0);
+    const float tolerance = output_tensor.scale() * 2;
+
+    MulParams params{};
+    params.activation = Activation::RELU;
+
+    Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorShape(output_tensor),
+                ::testing::ElementsAreArray(ref_output_shapes[i]))
+      << "With shape number " << i;
+    EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance))
+      << "With shape number " << i;
+  }
+  // Re-run with exchanged inputs and different scales.
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0,
+                                                          input2_data, _memory_manager.get());
+    Tensor input2_tensor = makeInputTensor<DataType::S16>(base_shape, 4.0 / 32767, 0, input1_data,
+                                                          _memory_manager.get());
+    Tensor output_tensor = makeOutputTensor(DataType::S16, 3.0 / 32767, 0);
+    const float tolerance = output_tensor.scale() * 2;
+
+    MulParams params{};
+    params.activation = Activation::RELU;
+
+    Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorShape(output_tensor),
+                ::testing::ElementsAreArray(ref_output_shapes[i]))
+      << "With shape number " << i;
+    EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance))
+      << "With shape number " << i;
+  }
+}
+
+TEST_F(MulTest, Input_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  MulParams params{};
+  params.activation = Activation::RELU;
+
+  Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(MulTest, Invalid_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  MulParams params{};
+  params.activation = Activation::RELU;
+
+  Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(MulTest, Invalid_Input_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U64);
+
+  MulParams params{};
+  params.activation = Activation::RELU;
+
+  Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST_F(MulTest, Invalid_Quantization_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S16>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S16>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16);
+
+  MulParams params{};
+  params.activation = Activation::NONE;
+
+  Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Neg.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Neg.cpp
new file mode 100644
index 000000000..c6fe08a9e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Neg.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Neg.h"
+#include "kernels/Utils.h"
+
+#include "PALNeg.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Neg::Neg(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Neg::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+  output()->resize(input()->shape());
+}
+
+void Neg::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Neg::evalFloat() const
+{
+  luci_interpreter_pal::Negate(getTensorShape(input()), getTensorData<float>(input()),
+                               getTensorShape(output()), getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Neg.h b/compiler/luci-micro/luci-interpreter/src/kernels/Neg.h
new file mode 100644
index 000000000..69fa1a18e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Neg.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_NEG_H
+#define LUCI_INTERPRETER_KERNELS_NEG_H
+
+#include "core/Kernel.h"
+#include <vector>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Neg : public Kernel
+{
+public:
+  Neg(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_NEG_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Neg.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Neg.test.cpp
new file mode 100644
index 000000000..8b2bc1a82
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Neg.test.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Neg.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<T> input_data, std::initializer_list<T> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  Neg kernel(&input_tensor, &output_tensor);
+
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(NegTest, FloatSimple)
+{
+  Check<float>(/*input_shape=*/{2, 3},
+               /*output_shape=*/{2, 3},
+               /*input_data=*/
+               {
+                 0.0f, 1.0f, 3.0f,   // Row 1
+                 1.0f, -1.0f, -2.0f, // Row 2
+               },
+               /*output_data=*/
+               {
+                 0.0f, -1.0f, -3.0f, // Row 1
+                 -1.0f, 1.0f, 2.0f,  // Row 2
+               });
+
+  SUCCEED();
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.cpp
new file mode 100644
index 000000000..54e5eee34
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/NotEqual.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+NotEqual::NotEqual(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void NotEqual::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+  if (x()->element_type() == DataType::U8)
+  {
+    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+  }
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void NotEqual::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void NotEqual::evalFloat() const
+{
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowNotEqual(op_params, getTensorShape(x()), x_data,
+                                                   getTensorShape(y()), y_data,
+                                                   getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::NotEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+                                    y_data, getTensorShape(output()), output_data);
+  }
+}
+
+template <typename T> void NotEqual::evalInteger() const
+{
+  const auto x_data = getTensorData<T>(x());
+  const auto y_data = getTensorData<T>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowNotEqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                                            getTensorShape(y()), y_data,
+                                                            getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::NotEqualNoScaling(op_params, getTensorShape(x()), x_data,
+                                             getTensorShape(y()), y_data, getTensorShape(output()),
+                                             output_data);
+  }
+}
+
+void NotEqual::evalQuantized() const
+{
+  const auto x_data = getTensorData<uint8_t>(x());
+  const auto y_data = getTensorData<uint8_t>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.left_shift = 8;
+  op_params.input1_offset = -x()->zero_point(); // Note the '-'
+  op_params.input1_shift = _x_shift;
+  op_params.input1_multiplier = _x_multiplier;
+  op_params.input2_offset = -y()->zero_point(); // Note the '-'
+  op_params.input2_shift = _y_shift;
+  op_params.input2_multiplier = _y_multiplier;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowNotEqualWithScaling(
+      op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+      output_data);
+  }
+  else
+  {
+    tflite::reference_ops::NotEqualWithScaling(op_params, getTensorShape(x()), x_data,
+                                               getTensorShape(y()), y_data,
+                                               getTensorShape(output()), output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.h b/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.h
new file mode 100644
index 000000000..d2aafe893
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_NOT_EQUAL_H
+#define LUCI_INTERPRETER_KERNELS_NOT_EQUAL_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class NotEqual : public Kernel
+{
+public:
+  NotEqual(const Tensor *x, const Tensor *y, Tensor *output);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  template <typename T> void evalInteger() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _x_multiplier = 0;
+  int _x_shift = 0;
+  int32_t _y_multiplier = 0;
+  int _y_shift = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_NOT_EQUAL_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.test.cpp
new file mode 100644
index 000000000..45bf4022a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/NotEqual.test.cpp
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/NotEqual.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class NotEqualTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(NotEqualTest, FloatSimple)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+    -1,  0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    true, false, true, // Row 1
+    true, false, true, // Row 2
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(NotEqualTest, FloatBroardcast)
+{
+  std::vector<float> x_data{
+    0.5, 0.7, 0.9, // Row 1
+    1,   0,   -1,  // Row 2
+    -1,  0,   1,   // Row 3
+    0.9, 0.7, 0.5, // Row 4
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.7, 0.5, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  false, true,  // Row 1
+    true,  true,  true,  // Row 2
+    true,  true,  true,  // Row 3
+    false, false, false, // Row 4
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({4, 3}, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerSimple(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{min_value, 2, max_value};
+
+  std::vector<dtype> y_data{min_value, -2, max_value};
+
+  std::vector<bool> ref_output_data{false, true, false};
+
+  Tensor x_tensor = makeInputTensor<DType>({3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3}));
+}
+
+template <loco::DataType DType>
+void checkIntegerBroadcast(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  dtype min_value = std::numeric_limits<dtype>::min();
+  dtype max_value = std::numeric_limits<dtype>::max();
+  std::vector<dtype> x_data{
+    min_value, 2,  3,         // Row 1
+    4,         5,  max_value, // Row 2
+    -1,        -2, -3,        // Row 3
+    min_value, -2, max_value, // Row 4
+  };
+
+  std::vector<dtype> y_data{
+    min_value, -2, max_value, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    false, true,  true,  // Row 1
+    true,  true,  false, // Row 2
+    true,  false, true,  // Row 3
+    false, false, false, // Row 4
+  };
+
+  Tensor x_tensor = makeInputTensor<DType>({4, 3}, x_data, memory_manager);
+  Tensor y_tensor = makeInputTensor<DType>({3}, y_data, memory_manager);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+TEST_F(NotEqualTest, Int32)
+{
+  checkIntegerSimple<loco::DataType::S32>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(NotEqualTest, Int64)
+{
+  checkIntegerSimple<loco::DataType::S64>(_memory_manager.get());
+  checkIntegerBroadcast<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
+// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+const float F_MIN = -128.0 / 128.0;
+const float F_MAX = 127.0 / 128.0;
+
+TEST_F(NotEqualTest, Uint8Quantized)
+{
+  std::vector<float> x_data{
+    0.5, 0.5, 0.7,  0.9, // Row 1
+    1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+    0.9, 0.5, 0.55, 0.5, // Row 1
+    -1,  0,   0.05, 1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+    true, false, true,  true, // Row 1
+    true, false, false, true, // Row 2
+  };
+
+  std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, x_quant_param.first, x_quant_param.second, x_data, _memory_manager.get());
+
+  std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 2);
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, y_quant_param.first, y_quant_param.second, y_data, _memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(NotEqualTest, Uint8QuantizedBroadcast)
+{
+  std::vector<float> x_data{
+    0.4,  -0.8, 0.7,  0.3, // Row 1
+    -0.5, 0.1,  0,    0.5, // Row 2
+    1,    0,    0.05, -1,  // Row 3
+    -1,   0.05, 0,    1,   // Row 4
+  };
+
+  std::vector<float> y_data{
+    -1, 0.05, 0, 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+    true,  true,  true,  true,  // Row 1
+    true,  true,  false, true,  // Row 2
+    true,  true,  true,  true,  // Row 3
+    false, false, false, false, // Row 4
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>(
+    {1, 4, 4, 1}, quant_param.first, quant_param.second, x_data, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 4, 1}, quant_param.first, quant_param.second, y_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST_F(NotEqualTest, Input_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(NotEqualTest, Input_Output_Type_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(NotEqualTest, Float_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2}, {1.f, 2.f}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({3}, {1.f, 2.f, 3.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(NotEqualTest, Int32_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S32>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S32>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(NotEqualTest, Int64_Broadcast_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::S64>({2}, {1, 2}, _memory_manager.get());
+  Tensor y_tensor = makeInputTensor<DataType::S64>({3}, {1, 2, 3}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  ASSERT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.cpp
new file mode 100644
index 000000000..4d3e5f2ef
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.cpp
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/OneHot.h"
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+
+template <typename T>
+void OneHotComputeImpl(const Tensor *indices_tensor, const Tensor *on_value_tensor,
+                       const Tensor *off_value_tensor, int32_t depth, int32_t axis,
+                       Tensor *output_tensor)
+{
+  // define input shape and correct axis
+  auto const &input_shape = indices_tensor->shape();
+  axis = axis == -1 ? input_shape.num_dims() : axis;
+
+  // TODO support other integer input types
+  auto const *indices = getTensorData<int32_t>(indices_tensor);
+  auto const on_value = getTensorData<T>(on_value_tensor)[0];
+  auto const off_value = getTensorData<T>(off_value_tensor)[0];
+  auto *output = getTensorData<T>(output_tensor);
+
+  // prefix_dim_size == # of elements before the axis
+  // depth == # of elements per axis
+  // suffix_dim_size == # of elements after the axis
+  auto prefix_dim_size = 1;
+  for (int32_t i = 0; i < axis; ++i)
+  {
+    prefix_dim_size *= input_shape.dim(i);
+  }
+  assert(prefix_dim_size > 0);
+  auto const suffix_dim_size = input_shape.num_elements() / prefix_dim_size;
+
+  // View the indices as a matrix of size:
+  //     prefix_dim_size x suffix_dim_size
+  // View the output as a matrix of size:
+  //     prefix_dim_size x depth x suffix_dim_size
+  // Then the output is:
+  //     output(i, j, k) == (indices(i, k) == j) ? on : off
+  for (int32_t i = 0; i < prefix_dim_size; ++i)
+    for (int32_t j = 0; j < depth; ++j)
+      for (int32_t k = 0; k < suffix_dim_size; ++k, ++output)
+        *output = indices[i * suffix_dim_size + k] == j ? on_value : off_value;
+}
+
+} // namespace
+
+OneHot::OneHot(const Tensor *indices, const Tensor *depth, const Tensor *on_value,
+               const Tensor *off_value, Tensor *output, const OneHotParams &params)
+  : KernelWithParams<OneHotParams>({indices, depth, on_value, off_value}, {output}, params)
+{
+  // Do nothing
+}
+
+void OneHot::configure()
+{
+  // check types
+  LUCI_INTERPRETER_CHECK(indices()->element_type() == DataType::S32);
+  LUCI_INTERPRETER_CHECK(depth()->element_type() == DataType::S32);
+  LUCI_INTERPRETER_CHECK(on_value()->element_type() == off_value()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == on_value()->element_type());
+
+  // check shape dependent parameters
+  LUCI_INTERPRETER_CHECK(on_value()->shape().num_elements() == 1);
+  LUCI_INTERPRETER_CHECK(off_value()->shape().num_elements() == 1);
+  LUCI_INTERPRETER_CHECK(depth()->shape().num_elements() == 1);
+  LUCI_INTERPRETER_CHECK(params().axis >= -1 && params().axis <= indices()->shape().num_dims());
+
+  // define parameters that affect the output shape
+  auto const depth_value = getTensorData<int32_t>(depth())[0];
+  auto const &input_shape = indices()->shape();
+  auto const input_dims = input_shape.num_dims();
+  auto const axis = params().axis == -1 ? input_dims : params().axis;
+
+  // define output shape
+  Shape output_shape(input_shape.num_dims() + 1);
+  {
+    for (int32_t d = 0; d < axis; ++d)
+      output_shape.dim(d) = input_shape.dim(d);
+
+    output_shape.dim(axis) = depth_value;
+
+    for (int32_t d = axis + 1; d < output_shape.num_dims(); ++d)
+      output_shape.dim(d) = input_shape.dim(d - 1);
+  }
+
+  // reshape output
+  output()->resize(output_shape);
+}
+
+void OneHot::execute() const
+{
+  auto const depth_value = getTensorData<int32_t>(depth())[0];
+  auto const axis = params().axis;
+
+  switch (output()->element_type())
+  {
+    case loco::DataType::FLOAT32:
+      OneHotComputeImpl<float>(indices(), on_value(), off_value(), depth_value, axis, output());
+      break;
+    case loco::DataType::U8:
+      OneHotComputeImpl<uint8_t>(indices(), on_value(), off_value(), depth_value, axis, output());
+      break;
+    case loco::DataType::S16:
+      OneHotComputeImpl<int16_t>(indices(), on_value(), off_value(), depth_value, axis, output());
+      break;
+    default:
+      // TODO Support other data types
+      throw std::runtime_error("Not supported, yet!");
+      break;
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.h b/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.h
new file mode 100644
index 000000000..572f857ae
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_ONEHOT_H
+#define LUCI_INTERPRETER_KERNELS_ONEHOT_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class OneHot : public KernelWithParams<OneHotParams>
+{
+public:
+  OneHot(const Tensor *indices, const Tensor *depth, const Tensor *on_value,
+         const Tensor *off_value, Tensor *output, const OneHotParams &params);
+
+  const Tensor *indices() const { return _inputs[0]; }
+  const Tensor *depth() const { return _inputs[1]; }
+  const Tensor *on_value() const { return _inputs[2]; }
+  const Tensor *off_value() const { return _inputs[3]; }
+
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_ONEHOT_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.test.cpp
new file mode 100644
index 000000000..45b6968fa
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/OneHot.test.cpp
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/OneHot.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T1, typename T2>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<T1> input_data, std::initializer_list<int32_t> depth_data,
+           std::initializer_list<T2> on_value_data, std::initializer_list<T2> off_value_data,
+           int32_t axis, std::initializer_list<T2> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  constexpr auto input_type = getElementType<T1>();
+  constexpr auto output_type = getElementType<T2>();
+
+  Tensor input_tensor = makeInputTensor<input_type>(input_shape, input_data, memory_manager.get());
+  Tensor depth_tensor = makeInputTensor<DataType::S32>({}, depth_data, memory_manager.get());
+  Tensor on_value_tensor = makeInputTensor<output_type>({}, on_value_data, memory_manager.get());
+  Tensor off_value_tensor = makeInputTensor<output_type>({}, off_value_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(output_type);
+
+  OneHotParams params{};
+  params.axis = axis;
+
+  OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor,
+                params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+  EXPECT_THAT(extractTensorData<T2>(output_tensor), ::testing::ElementsAreArray(output_data));
+}
+
+template <typename T> class OneHotTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t, int16_t>;
+TYPED_TEST_SUITE(OneHotTest, DataTypes);
+
+TYPED_TEST(OneHotTest, BasicPattern)
+{
+  // axis 0
+  Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{4, 2, 3},
+                            /*input_data=*/
+                            {
+                              0, 3, 5, //
+                              7, 3, 0, //
+                            },
+                            /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0},
+                            /*axis=*/0,
+                            /*output_data=*/
+                            {
+                              1, 0, 0, //
+                              0, 0, 1, //
+
+                              0, 0, 0, //
+                              0, 0, 0, //
+
+                              0, 0, 0, //
+                              0, 0, 0, //
+
+                              0, 1, 0, //
+                              0, 1, 0, //
+                            });
+  // axis 1
+  Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 4, 3},
+                            /*input_data=*/
+                            {
+                              0, 3, 5, //
+                              7, 3, 0, //
+                            },
+                            /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0},
+                            /*axis=*/1,
+                            /*output_data=*/
+                            {
+                              1, 0, 0, //
+                              0, 0, 0, //
+                              0, 0, 0, //
+                              0, 1, 0, //
+
+                              0, 0, 1, //
+                              0, 0, 0, //
+                              0, 0, 0, //
+                              0, 1, 0, //
+                            });
+  // axis -1
+  Check<int32_t, TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 3, 4},
+                            /*input_data=*/
+                            {
+                              0, 3, 5, //
+                              7, 3, 0, //
+                            },
+                            /*depth_data=*/{4}, /*on_value_data=*/{1}, /*off_value_data=*/{0},
+                            /*axis=*/-1,
+                            /*output_data=*/
+                            {
+                              1, 0, 0, 0, //
+                              0, 0, 0, 1, //
+                              0, 0, 0, 0, //
+
+                              0, 0, 0, 0, //
+                              0, 0, 0, 1, //
+                              1, 0, 0, 0, //
+                            });
+}
+
+TEST(OneHotTest, UnsupportedInputType_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  // input type should be integer
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {0}, memory_manager.get());
+
+  Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get());
+  Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get());
+  Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  OneHotParams params = {-1};
+
+  OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor,
+                params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(OneHotTest, OutputTypeMismatch_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::S32>({1}, {0}, memory_manager.get());
+  Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get());
+
+  // type of on_value, off_value and output_tensor should be same
+  Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get());
+  Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16);
+
+  OneHotParams params = {-1};
+
+  OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor,
+                params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(OneHotTest, InvalidAxis_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::S32>({1}, {0}, memory_manager.get());
+  Tensor depth_tensor = makeInputTensor<DataType::S32>({}, {1}, memory_manager.get());
+  Tensor on_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {1.0}, memory_manager.get());
+  Tensor off_value_tensor = makeInputTensor<DataType::FLOAT32>({}, {0.0}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  // axis should be in [-1, input_shape.rank]
+  OneHotParams params = {-2};
+
+  OneHot kernel(&input_tensor, &depth_tensor, &on_value_tensor, &off_value_tensor, &output_tensor,
+                params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.cpp
new file mode 100644
index 000000000..5a6b05c3a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.cpp
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/PRelu.h"
+
+#include "kernels/BinaryOpCommon.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/binary_function.h>
+#include <tensorflow/lite/kernels/internal/reference/prelu.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+PRelu::PRelu(const Tensor *input, const Tensor *alpha, Tensor *output)
+  : Kernel({input, alpha}, {output})
+{
+}
+
+PRelu::~PRelu()
+{
+  // Destructor declared to delete vector of alpha quantized data properly
+}
+
+void PRelu::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(alpha()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(input()->scales().size() <= 1);
+  LUCI_INTERPRETER_CHECK(output()->scales().size() <= 1);
+
+  if (input()->element_type() == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(alpha()->scales().size() <= 1); // remove when CWQ kernel arrives
+    _alpha_multipliers.resize(1);
+    double alpha_multiplier = input()->scale() * alpha()->scale() / output()->scale();
+    quantizeMultiplier(alpha_multiplier, &_alpha_multipliers[0].multiplier,
+                       &_alpha_multipliers[0].shift);
+    double identity_multiplier = input()->scale() / output()->scale();
+    quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
+  }
+  else if (input()->element_type() == DataType::S16)
+  {
+    // Common check for correctness of quant params
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+    for (size_t channel = 0; channel < alpha()->zero_points().size(); ++channel)
+    {
+      LUCI_INTERPRETER_CHECK(alpha()->zero_points()[channel] == 0);
+    }
+    // PRelu specific checks for CWQ
+    LUCI_INTERPRETER_CHECK(alpha()->quantized_dimension() == alpha()->shape().num_dims() - 1);
+    LUCI_INTERPRETER_CHECK(static_cast<int32_t>(alpha()->scales().size()) ==
+                           alpha()->shape().dim(alpha()->quantized_dimension()));
+    LUCI_INTERPRETER_CHECK(alpha()->shape().num_elements() ==
+                           input()->shape().dim(input()->shape().num_dims() - 1));
+
+    // all dimension of alpha except last one should be size 1
+    for (int dim = 0; dim < alpha()->shape().num_dims() - 1; ++dim)
+    {
+      LUCI_INTERPRETER_CHECK(alpha()->shape().dim(dim) == 1);
+    }
+
+    std::vector<double> real_multipliers =
+      getQuantizedConvolutionMultiplers(input()->scale(), alpha()->scales(), output()->scale());
+
+    _alpha_multipliers = quantizeMultipliers(real_multipliers);
+
+    double identity_multiplier = input()->scale() / output()->scale();
+    quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
+  }
+  output()->resize(calculateShapeForBroadcast(input()->shape(), alpha()->shape()));
+}
+
+void PRelu::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void PRelu::evalFloat() const
+{
+  const auto input_data = getTensorData<float>(input());
+  const auto alpha_data = getTensorData<float>(alpha());
+  const auto size = getTensorShape(input()).FlatSize();
+  auto output_data = getTensorData<float>(output());
+
+  auto PReluFunc = [](float input, float alpha) { return input >= 0.0 ? input : input * alpha; };
+
+  if (input()->shape() != alpha()->shape())
+  {
+    tflite::reference_ops::BroadcastBinaryFunction4DSlow<float, float, float>(
+      getTensorShape(input()), getTensorData<float>(input()), getTensorShape(alpha()),
+      getTensorData<float>(alpha()), getTensorShape(output()), getTensorData<float>(output()),
+      PReluFunc);
+  }
+  else
+  {
+    for (auto i = decltype(size){0}; i < size; ++i)
+    {
+      if (input_data[i] >= 0)
+        output_data[i] = input_data[i];
+      else
+        output_data[i] = input_data[i] * alpha_data[i];
+    }
+  }
+}
+
+void PRelu::evalQuantized() const
+{
+  tflite::PreluParams op_params{};
+
+  op_params.input_offset = -input()->zero_point(); // Note the '-'.
+  op_params.alpha_offset = -alpha()->zero_point(); // Note the '-'.
+  op_params.output_offset = output()->zero_point();
+  op_params.output_shift_1 = _output_shift_identity;
+  op_params.output_multiplier_1 = _output_multiplier_identity;
+  op_params.output_shift_2 = _alpha_multipliers[0].shift;
+  op_params.output_multiplier_2 = _alpha_multipliers[0].multiplier;
+
+  if (input()->shape() != alpha()->shape())
+  {
+    tflite::reference_ops::BroadcastPrelu4DSlow(
+      op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(alpha()),
+      getTensorData<uint8_t>(alpha()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Prelu<uint8_t>(
+      op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(alpha()),
+      getTensorData<uint8_t>(alpha()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+}
+
+static inline int16_t evalElemS16PRelu(int16_t input_val, int16_t alpha_val,
+                                       const ChannelQuantMultipliers &identity_mult,
+                                       const ChannelQuantMultipliers &alpha_mult)
+{
+  constexpr int32_t quantized_min = std::numeric_limits<int16_t>::min();
+  constexpr int32_t quantized_max = std::numeric_limits<int16_t>::max();
+
+  const int32_t output_val =
+    input_val >= 0
+      ? tflite::MultiplyByQuantizedMultiplier(static_cast<int32_t>(input_val),
+                                              identity_mult.multiplier, identity_mult.shift)
+      : tflite::MultiplyByQuantizedMultiplier(static_cast<int32_t>(input_val * alpha_val),
+                                              alpha_mult.multiplier, alpha_mult.shift);
+  const int32_t clamped_output = std::min(quantized_max, std::max(quantized_min, output_val));
+  return clamped_output;
+}
+
+void PRelu::evalQuantizedS16() const
+{
+  // Note that this kernel assumes alpha is CWQ
+  tflite::RuntimeShape input_shape = getTensorShape(input());
+  const int16_t *input_data = input()->data<int16_t>();
+  const int16_t *alpha_data = alpha()->data<int16_t>();
+  int16_t *output_data = output()->data<int16_t>();
+
+  const ChannelQuantMultipliers pos_mult{_output_shift_identity, _output_multiplier_identity};
+
+  const int last_dim = input()->shape().num_dims() - 1;
+
+  int32_t outer_dims_size = 1;
+  for (int i = 0; i < last_dim; ++i)
+    outer_dims_size *= input_shape.Dims(i);
+  int32_t quant_dim_size = input_shape.Dims(last_dim);
+
+  for (int32_t outer_dims = 0; outer_dims < outer_dims_size; ++outer_dims)
+    for (int32_t quant_channel = 0; quant_channel < quant_dim_size; ++quant_channel)
+    {
+      const ChannelQuantMultipliers &neg_mult = _alpha_multipliers[quant_channel];
+      size_t offset = static_cast<size_t>(outer_dims) * static_cast<size_t>(quant_dim_size);
+      offset += quant_channel;
+
+      output_data[offset] =
+        evalElemS16PRelu(input_data[offset], alpha_data[quant_channel], pos_mult, neg_mult);
+    }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.h b/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.h
new file mode 100644
index 000000000..f7735d418
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_PRELU_H
+#define LUCI_INTERPRETER_KERNELS_PRELU_H
+
+#include "core/Kernel.h"
+#include <vector>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ChannelQuantMultipliers;
+
+class PRelu : public Kernel
+{
+public:
+  PRelu(const Tensor *input, const Tensor *alpha, Tensor *output);
+
+  ~PRelu();
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *alpha() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void evalQuantizedS16() const;
+
+private:
+  std::vector<ChannelQuantMultipliers> _alpha_multipliers;
+  // TODO merge this into one ChannelQuantMultiplier object
+  int32_t _output_multiplier_identity = 0;
+  int _output_shift_identity = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_PRELU_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.test.cpp
new file mode 100644
index 000000000..6d97382de
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/PRelu.test.cpp
@@ -0,0 +1,397 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/PRelu.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> alpha_shape,
+           std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data,
+           std::initializer_list<T> alpha_data, std::initializer_list<T> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor alpha_tensor =
+    makeInputTensor<element_type>(alpha_shape, alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(PReluTest, FloatSimple)
+{
+  Check<float>(/*input_shape=*/{2, 3}, /*alpha_shape=*/{2, 3},
+               /*output_shape=*/{2, 3},
+               /*input_data=*/
+               {
+                 0.0f, 1.0f, 3.0f,   // Row 1
+                 1.0f, -1.0f, -2.0f, // Row 2
+               },
+               /*alpha_data=*/
+               {
+                 0.0f, 0.5f, 0.1f, // Row 1
+                 0.0f, 0.5f, 0.1f, // Row 2
+               },
+               /*output_data=*/
+               {
+                 0.0f, 1.0f, 3.0f,   // Row 1
+                 1.0f, -0.5f, -0.2f, // Row 2
+               });
+
+  SUCCEED();
+}
+
+TEST(PReluTest, FloatBroadcast)
+{
+  Check<float>(/*input_shape=*/{1, 2, 2, 3}, /*alpha_shape=*/{1, 1, 3},
+               /*output_shape=*/{1, 2, 2, 3},
+               /*input_data=*/
+               {
+                 0.0f, 0.0f, 0.0f,    // Row 1, Column 1
+                 1.0f, 1.0f, 1.0f,    // Row 1, Column 2
+                 -1.0f, -1.0f, -1.0f, // Row 2, Column 1
+                 -2.0f, -2.0f, -2.0f, // Row 2, Column 2
+               },
+               /*alpha_data=*/
+               {0.0f, 1.0f, 2.0f},
+               /*output_data=*/
+               {
+                 0.0f, 0.0f, 0.0f,   // Row 1, Column 1
+                 1.0f, 1.0f, 1.0f,   // Row 1, Column 2
+                 0.0f, -1.0f, -2.0f, // Row 2, Column 1
+                 0.0f, -2.0f, -4.0f, // Row 2, Column 2
+               });
+
+  SUCCEED();
+}
+
+float GetTolerance(float min, float max) { return (max - min) / 255.0; }
+
+TEST(PReluTest, Uint8Simple)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{-0.8f, 0.2f, 0.9f, 0.7f, 0.1f, -0.4f};
+  std::vector<float> alpha_data{0.5f, 0.5f, 0.5f, 0.25f, 1.0f, 0.25f};
+  std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, 0.7f, 0.1f, -0.1f};
+
+  float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 3, 1}, quant_param.first, quant_param.second, alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 3, 1}));
+
+  SUCCEED();
+}
+
+TEST(PReluTest, Uint8Broadcast)
+{
+  std::vector<float> input_data{
+    0.0f,   0.0f,   0.0f,   // Row 1, Column 1
+    0.5f,   0.5f,   0.5f,   // Row 1, Column 2
+    -1.0f,  -1.0f,  -1.0f,  // Row 2, Column 1
+    -0.25f, -0.25f, -0.25f, // Row 2, Column 2
+  };
+  std::vector<float> alpha_data{0.0f, 0.5f, -0.5f};
+  std::vector<float> ref_output_data{
+    0.0f, 0.0f,    0.0f,  // Row 1, Column 1
+    0.5f, 0.5f,    0.5f,  // Row 1, Column 2
+    0.0f, -0.5f,   0.5f,  // Row 2, Column 1
+    0.0f, -0.125f, 0.125f // Row 2, Column 2
+  };
+  std::vector<float> ref_quant_output_data{
+    128, 128, 128, // Row 1, Column 1
+    192, 192, 192, // Row 1, Column 2
+    128, 64,  192, // Row 2, Column 1
+    128, 112, 144  // Row 2, Column 2
+  };
+  float kQuantizedTolerance = 2 * (1. / 256);
+  const float kMin = -1;
+  const float kMax = 127.f / 128.f;
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(kMin, kMax);
+
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 2, 3}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 3}, quant_param.first, quant_param.second, alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 3}));
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_quant_output_data));
+}
+
+TEST(PReluTest, SInt16_LWQ_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  // Rewrite this test in case layer-wise quantization for sint16 is supported
+  std::vector<float> input_data(6); // data is not important
+  std::vector<float> alpha_data(6);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 2, 3, 1}, 0.1, 0, input_data, memory_manager.get());
+  Tensor alpha_tensor =
+    makeInputTensor<DataType::S16>({1, 2, 3, 1}, 0.1, 0, alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, SInt16_CWQ_Simple)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{-0.8f, 0.2f, 0.9f, -0.7f, 0.1f, -0.4f};
+  std::vector<float> alpha_data{0.5f, 0.25f};
+  std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, -0.175f, 0.1f, -0.1f};
+
+  std::vector<float> alpha_scales{0.05f, 0.025f};
+  std::vector<int32_t> zerop{0, 0};
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get());
+  Tensor alpha_tensor =
+    makeInputTensor<DataType::S16>({2}, alpha_scales, zerop, 0, alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.025, 0);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 3, 2}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(PReluTest, SInt16_CWQ_spatial_alpha_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data(6); // data is not important
+  std::vector<float> alpha_data(6);
+
+  std::vector<float> alpha_scales{0.25f, 0.05f};
+  std::vector<int32_t> zerop{0, 0};
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 3, 2}, alpha_scales, zerop, 3,
+                                                       alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, SInt16_CWQ_wrong_dim_quant_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data(6); // data is not important
+  std::vector<float> alpha_data(6);
+
+  std::vector<float> alpha_scales{0.25f};
+  std::vector<int32_t> zerop{0};
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 1, 2}, alpha_scales, zerop, 1,
+                                                       alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, SInt16_CWQ_uneven_shape1)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{-0.8f, 0.2f, 0.9f, -0.7f, 0.1f, -0.4f};
+  std::vector<float> alpha_data{0.5f, 0.25f};
+  std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, -0.175f, 0.1f, -0.1f};
+
+  std::vector<float> alpha_scales{0.05f, 0.025f};
+  std::vector<int32_t> zerop{0, 0};
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 3, 2}, 0.1, 0, input_data, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 2}, alpha_scales, zerop, 2,
+                                                       alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.025, 0);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 3, 2}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(PReluTest, SInt16_CWQ_uneven_shape2)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{
+    0.0f,   0.0f,   0.0f,   // Row 1, Column 1
+    0.5f,   0.5f,   0.5f,   // Row 1, Column 2
+    -1.0f,  -1.0f,  -1.0f,  // Row 2, Column 1
+    -0.25f, -0.25f, -0.25f, // Row 2, Column 2
+  };
+  std::vector<float> alpha_data{0.0f, 0.5f, -0.5f};
+  std::vector<float> ref_output_data{
+    0.0f, 0.0f,    0.0f,  // Row 1, Column 1
+    0.5f, 0.5f,    0.5f,  // Row 1, Column 2
+    0.0f, -0.5f,   0.5f,  // Row 2, Column 1
+    0.0f, -0.125f, 0.125f // Row 2, Column 2
+  };
+
+  std::vector<float> alpha_scales{1.f, 0.05f, 0.1f};
+  std::vector<int32_t> zerop{0, 0, 0};
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 2, 2, 3}, 0.01, 0, input_data, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 1, 3}, alpha_scales, zerop, 3,
+                                                       alpha_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.001, 0);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 3}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(PReluTest, Input_Output_Type_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, Input_Alpha_Type_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::U8>({1}, {1}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, Invalid_Input_Type_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get());
+  Tensor alpha_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST(PReluTest, Input_Output_U8_CWQ_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> scales{1.f, 1.f};
+  std::vector<int32_t> zerop{0, 0};
+  std::vector<float> dummy_data(4, 0.f);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+  Tensor alpha_tensor =
+    makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+  Tensor output_tensor =
+    makeInputTensor<DataType::U8>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, Input_Output_S16_CWQ_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> scales{1.f, 1.f};
+  std::vector<int32_t> zerop{0, 0};
+  std::vector<float> dummy_data(4, 0.f);
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+  Tensor alpha_tensor =
+    makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+  Tensor output_tensor =
+    makeInputTensor<DataType::S16>({2, 2}, scales, zerop, 0, dummy_data, memory_manager.get());
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PReluTest, Mixing_U8_S16_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> dummy_data(4, 0.f);
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({2, 2}, 1.f, 0, dummy_data, memory_manager.get());
+  Tensor alpha_tensor =
+    makeInputTensor<DataType::S16>({2, 2}, 1.f, 0, dummy_data, memory_manager.get());
+  Tensor output_tensor =
+    makeInputTensor<DataType::U8>({2, 2}, 1.f, 0, dummy_data, memory_manager.get());
+
+  PRelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pack.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pack.cpp
new file mode 100644
index 000000000..42aab330c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pack.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pack.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Pack::Pack(std::vector<const Tensor *> inputs, Tensor *output, const PackParams &params)
+  : KernelWithParams<PackParams>(std::move(inputs), {output}, params)
+{
+}
+
+void Pack::configure()
+{
+  LUCI_INTERPRETER_CHECK(_inputs.size() == static_cast<uint32_t>(params().values_count));
+  const Tensor *t0 = _inputs[0];
+  const int dimension_size = t0->shape().num_dims() + 1;
+  int axis = params().axis;
+  if (axis < 0)
+  {
+    axis += dimension_size;
+  }
+  LUCI_INTERPRETER_CHECK(axis >= 0 && axis <= t0->shape().num_dims());
+
+  if (t0->element_type() != DataType::S32 && t0->element_type() != DataType::FLOAT32 &&
+      t0->element_type() != DataType::U8 && t0->element_type() != DataType::S8 &&
+      t0->element_type() != DataType::S16 && t0->element_type() != DataType::S64)
+  {
+    throw std::runtime_error("Unsupported type.");
+  }
+
+  for (uint32_t i = 1; i < _inputs.size(); ++i)
+  {
+    const Tensor *tensor = _inputs[i];
+    LUCI_INTERPRETER_CHECK(tensor->element_type() == t0->element_type());
+    LUCI_INTERPRETER_CHECK(tensor->shape().num_dims() == t0->shape().num_dims());
+    for (int d = 0; d < t0->shape().num_dims(); ++d)
+    {
+      LUCI_INTERPRETER_CHECK(tensor->shape().dim(d) == t0->shape().dim(d));
+    }
+  }
+
+  Shape output_shape(dimension_size);
+  int i = 0;
+  for (int index = 0; index < dimension_size; ++index)
+  {
+    if (index == axis)
+    {
+      output_shape.dim(index) = params().values_count;
+    }
+    else
+    {
+      output_shape.dim(index) = t0->shape().dim(i++);
+    }
+  }
+
+  if (t0->element_type() == DataType::U8 || t0->element_type() == DataType::S8 ||
+      t0->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(output()->zero_point() == t0->zero_point());
+    LUCI_INTERPRETER_CHECK(output()->scale() == t0->scale());
+    // Guarantee input/output quantization params match as we do not support
+    // packing quantized tensors.
+    for (int i = 0; i < params().values_count; i++)
+    {
+      LUCI_INTERPRETER_CHECK(_inputs[i]->zero_point() == t0->zero_point());
+      LUCI_INTERPRETER_CHECK(_inputs[i]->scale() == t0->scale());
+    }
+  }
+
+  output()->resize(output_shape);
+}
+
+void Pack::execute() const
+{
+  switch (_inputs[0]->element_type())
+  {
+    case DataType::FLOAT32:
+      evalGeneric<float>();
+      break;
+    case DataType::U8:
+      evalGeneric<uint8_t>();
+      break;
+    case DataType::S8:
+      evalGeneric<int8_t>();
+      break;
+    case DataType::S16:
+      evalGeneric<int16_t>();
+      break;
+    case DataType::S32:
+      evalGeneric<int32_t>();
+      break;
+    case DataType::S64:
+      evalGeneric<int64_t>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T> void Pack::evalGeneric() const
+{
+  const Tensor *t0 = _inputs[0];
+  const int dimension_size = t0->shape().num_dims() + 1;
+  int axis = params().axis;
+  if (axis < 0)
+  {
+    axis += dimension_size;
+  }
+
+  VectorOfTensors<T, true> inputs(_inputs);
+  tflite::PackParams params{};
+  params.axis = axis;
+  params.inputs_count = _inputs.size();
+  tflite::reference_ops::Pack<T>(params, inputs.shapes(), inputs.data(), getTensorShape(output()),
+                                 getTensorData<T>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pack.h b/compiler/luci-micro/luci-interpreter/src/kernels/Pack.h
new file mode 100644
index 000000000..4a2fcfd80
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pack.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_PACK_H
+#define LUCI_INTERPRETER_KERNELS_PACK_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Pack : public KernelWithParams<PackParams>
+{
+public:
+  Pack(std::vector<const Tensor *> inputs, Tensor *output, const PackParams &params);
+
+  const Tensor *input(int index) const { return _inputs[index]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> void evalGeneric() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_PACK_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pack.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pack.test.cpp
new file mode 100644
index 000000000..d16320b78
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pack.test.cpp
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pack.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::vector<std::initializer_list<int32_t>> input_shapes,
+           std::initializer_list<int32_t> output_shape, std::vector<std::vector<T>> input_datas,
+           std::initializer_list<T> output_data, int32_t axis)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+  std::vector<const Tensor *> inputs(input_datas.size());
+  std::vector<Tensor> tmp_inputs;
+  for (int i = 0; i < input_datas.size(); i++)
+  {
+    if (std::is_same<T, float>::value || std::is_same<T, int32_t>::value ||
+        std::is_same<T, int64_t>::value)
+    {
+      tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {}, ""));
+      memory_manager->allocate_memory(tmp_inputs[i]);
+      tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T));
+    }
+    else if (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value)
+    {
+      tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {{1.0f / 255}, {128}}, ""));
+      memory_manager->allocate_memory(tmp_inputs[i]);
+      tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T));
+    }
+    else
+    {
+      assert((std::is_same<T, int16_t>::value) && "unexpected dtype is tested");
+      tmp_inputs.push_back(Tensor(element_type, input_shapes[i], {{1.0f}, {0}}, ""));
+      memory_manager->allocate_memory(tmp_inputs[i]);
+      tmp_inputs[i].writeData(input_datas[i].data(), input_datas[i].size() * sizeof(T));
+    }
+  }
+  for (int i = 0; i < input_datas.size(); i++)
+  {
+    inputs[i] = &tmp_inputs[i];
+  }
+
+  Tensor output_tensor = makeOutputTensor(element_type);
+  if (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value)
+  {
+    output_tensor = makeOutputTensor(element_type, 1.0f / 255, 128);
+  }
+  else if (std::is_same<T, int16_t>::value)
+  {
+    output_tensor = makeOutputTensor(element_type, 1.0f, 0);
+  }
+
+  PackParams params{};
+  params.axis = axis;
+  params.values_count = input_datas.size();
+  Pack kernel(inputs, &output_tensor, params);
+
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <typename T> class PackTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<uint8_t, int8_t, int16_t, int32_t, int64_t, float>;
+TYPED_TEST_SUITE(PackTest, DataTypes);
+
+TYPED_TEST(PackTest, ThreeInputs)
+{
+  Check<TypeParam>(/*input_shapes=*/{{2}, {2}, {2}},
+                   /*output_shape=*/{3, 2},
+                   /*input_datas=*/
+                   {{1, 4}, {2, 5}, {3, 6}},
+                   /*output_data=*/
+                   {1, 4, 2, 5, 3, 6}, /*axis=*/0);
+
+  SUCCEED();
+}
+
+TYPED_TEST(PackTest, NegAxis)
+{
+  Check<TypeParam>(/*input_shapes=*/{{2}, {2}, {2}},
+                   /*output_shape=*/{2, 3},
+                   /*input_datas=*/
+                   {{1, 4}, {2, 5}, {3, 6}},
+                   /*output_data=*/
+                   {1, 2, 3, 4, 5, 6}, /*axis=*/-1);
+
+  SUCCEED();
+}
+
+TEST(Pack, MismatchingInputValuesCount_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input1_data{1, 4};
+  std::vector<float> input2_data{2, 5};
+  std::vector<float> input3_data{3, 6};
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2}, input1_data, memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2}, input2_data, memory_manager.get());
+  Tensor input3_tensor = makeInputTensor<DataType::FLOAT32>({2}, input3_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  PackParams params{};
+  {
+    params.axis = 0;
+    params.values_count = 2;
+
+    Pack kernel({&input1_tensor, &input2_tensor, &input3_tensor}, &output_tensor, params);
+    EXPECT_ANY_THROW(kernel.configure());
+  }
+}
+
+TEST(Pack, InvalidInputAxis_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input1_data{1, 4};
+  std::vector<float> input2_data{2, 5};
+  std::vector<float> input3_data{3, 6};
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2}, input1_data, memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2}, input2_data, memory_manager.get());
+  Tensor input3_tensor = makeInputTensor<DataType::FLOAT32>({2}, input3_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  PackParams params{};
+  {
+    params.axis = 2;
+    params.values_count = 3;
+
+    Pack kernel({&input1_tensor, &input2_tensor, &input3_tensor}, &output_tensor, params);
+    EXPECT_ANY_THROW(kernel.configure());
+  }
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pad.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pad.cpp
new file mode 100644
index 000000000..c07f6e310
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pad.cpp
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pad.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/pad.h>
+
+#include <limits>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Pad::Pad(const Tensor *input, const Tensor *paddings, Tensor *output)
+  : Kernel({input, paddings}, {output})
+{
+}
+
+void Pad::configure()
+{
+  const Shape &input_shape = input()->shape();
+  const int num_dims = input_shape.num_dims();
+
+  if (num_dims > 4)
+    throw std::runtime_error("Unsupported number of dimensions.");
+
+  assert(output()->element_type() == input()->element_type());
+  assert(paddings()->element_type() == DataType::S32);
+  // Paddings shape should be [N, 2].
+  assert(paddings()->shape().num_dims() == 2);
+  assert(paddings()->shape().dim(0) == num_dims);
+  assert(paddings()->shape().dim(1) == 2);
+
+  Shape output_shape(num_dims);
+  const auto *paddings_data = getTensorData<int32_t>(paddings());
+  for (int i = 0; i < num_dims; ++i)
+  {
+    const int32_t padding_before = paddings_data[i * 2];
+    const int32_t padding_after = paddings_data[i * 2 + 1];
+    assert(padding_before >= 0 && padding_after >= 0);
+    output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after;
+  }
+
+  output()->resize(output_shape);
+}
+
+void Pad::execute() const
+{
+  const int num_dims = input()->shape().num_dims();
+
+  tflite::PadParams params{};
+  params.left_padding_count = num_dims;
+  params.right_padding_count = num_dims;
+
+  const auto *paddings_data = getTensorData<int32_t>(paddings());
+  for (int i = num_dims - 1; i >= 0; --i)
+  {
+    params.left_padding[i] = paddings_data[i * 2];
+    params.right_padding[i] = paddings_data[i * 2 + 1];
+  }
+
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+    {
+      const float pad_value = 0.0f;
+      tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<float>(input()),
+                                 &pad_value, getTensorShape(output()),
+                                 getTensorData<float>(output()));
+      break;
+    }
+    case DataType::U8:
+    {
+      assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min());
+      assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max());
+      const auto pad_value = static_cast<uint8_t>(output()->zero_point());
+      tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+                                 &pad_value, getTensorShape(output()),
+                                 getTensorData<uint8_t>(output()));
+      break;
+    }
+    case DataType::S8:
+    {
+      assert(output()->zero_point() >= std::numeric_limits<int8_t>::min());
+      assert(output()->zero_point() <= std::numeric_limits<int8_t>::max());
+      const auto pad_value = static_cast<int8_t>(output()->zero_point());
+      tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<int8_t>(input()),
+                                 &pad_value, getTensorShape(output()),
+                                 getTensorData<int8_t>(output()));
+      break;
+    }
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pad.h b/compiler/luci-micro/luci-interpreter/src/kernels/Pad.h
new file mode 100644
index 000000000..e05b47f29
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pad.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_PAD_H
+#define LUCI_INTERPRETER_KERNELS_PAD_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Pad : public Kernel
+{
+public:
+  Pad(const Tensor *input, const Tensor *paddings, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *paddings() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_PAD_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pad.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pad.test.cpp
new file mode 100644
index 000000000..dd3ce947c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pad.test.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pad.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+float GetTolerance(float min, float max) { return (max - min) / 255.0; }
+
+TEST(Pad, Uint8)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
+  std::vector<float> input_data{-0.8, 0.2, 0.9, 0.7, 0.1, -0.3};
+  std::vector<int32_t> paddings_data{0, 0, 0, 2, 1, 3, 0, 0};
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  Pad kernel(&input_tensor, &paddings_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0, -0.8, 0.2, 0.9, 0, 0, 0, 0, 0.7, 0.1, -0.3, 0, 0, 0,
+                                     0, 0,    0,   0,   0, 0, 0, 0, 0,   0,   0,    0, 0, 0};
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 7, 1}));
+}
+
+TEST(Pad, Int8)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+  std::pair<float, int32_t> quant_param = quantizationParams<int8_t>(-1.0f, 1.0f);
+  std::vector<float> input_data{-0.2, 0.4, 0.5, -0.7, -0.1, -0.9, 0.7, 0.1, 0.2};
+  std::vector<int32_t> paddings_data{0, 0, 1, 2, 2, 1, 0, 0};
+  Tensor input_tensor = makeInputTensor<DataType::S8>(
+    {1, 3, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, quant_param.first, quant_param.second);
+
+  Pad kernel(&input_tensor, &paddings_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0, 0, 0,    0,    0,    0, 0, 0, -0.2, 0.4, 0.5, 0,
+                                     0, 0, -0.7, -0.1, -0.9, 0, 0, 0, 0.7,  0.1, 0.2, 0,
+                                     0, 0, 0,    0,    0,    0, 0, 0, 0,    0,   0,   0};
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 6, 6, 1}));
+}
+
+TEST(Pad, Float)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6};
+  std::vector<int32_t> paddings_data{1, 0, 0, 2, 0, 3, 0, 0};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 3, 1}, input_data, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pad kernel(&input_tensor, &paddings_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                     0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 0, 0, 0, 4, 5,
+                                     6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  std::initializer_list<int32_t> ref_output_shape{2, 4, 6, 1};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.cpp
new file mode 100644
index 000000000..197cdaa69
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.cpp
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/PadV2.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/pad.h>
+
+#include <limits>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+PadV2::PadV2(const Tensor *input, const Tensor *paddings, const Tensor *constant_values,
+             Tensor *output)
+  : Kernel({input, paddings, constant_values}, {output})
+{
+}
+
+void PadV2::configure()
+{
+  const Shape &input_shape = input()->shape();
+  const int num_dims = input_shape.num_dims();
+
+  if (num_dims > 4)
+    throw std::runtime_error("Unsupported number of dimensions.");
+
+  assert(output()->element_type() == input()->element_type());
+  assert(paddings()->element_type() == DataType::S32);
+  assert(constant_values()->element_type() == output()->element_type());
+  // Paddings shape should be [N, 2].
+  assert(paddings()->shape().num_dims() == 2);
+  assert(paddings()->shape().dim(0) == num_dims);
+  assert(paddings()->shape().dim(1) == 2);
+  // Constant values elements number should be 1.
+  assert(constant_values()->shape().num_elements() == 1);
+
+  Shape output_shape(num_dims);
+  const auto *paddings_data = getTensorData<int32_t>(paddings());
+  for (int i = 0; i < num_dims; ++i)
+  {
+    const int32_t padding_before = paddings_data[i * 2];
+    const int32_t padding_after = paddings_data[i * 2 + 1];
+    assert(padding_before >= 0 && padding_after >= 0);
+    output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after;
+  }
+
+  output()->resize(output_shape);
+}
+
+void PadV2::execute() const
+{
+  const int num_dims = input()->shape().num_dims();
+
+  tflite::PadParams params{};
+  params.left_padding_count = num_dims;
+  params.right_padding_count = num_dims;
+
+  const auto *paddings_data = getTensorData<int32_t>(paddings());
+  for (int i = num_dims - 1; i >= 0; --i)
+  {
+    params.left_padding[i] = paddings_data[i * 2];
+    params.right_padding[i] = paddings_data[i * 2 + 1];
+  }
+
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+    {
+      const auto pad_value = getTensorData<float>(constant_values())[0];
+      tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<float>(input()),
+                                 &pad_value, getTensorShape(output()),
+                                 getTensorData<float>(output()));
+      break;
+    }
+    case DataType::U8:
+    {
+      assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min());
+      assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max());
+      const auto pad_value = getTensorData<uint8_t>(constant_values())[0];
+      tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+                                 &pad_value, getTensorShape(output()),
+                                 getTensorData<uint8_t>(output()));
+      break;
+    }
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.h b/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.h
new file mode 100644
index 000000000..48a31f584
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_PAD_V2_H
+#define LUCI_INTERPRETER_KERNELS_PAD_V2_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class PadV2 : public Kernel
+{
+public:
+  PadV2(const Tensor *input, const Tensor *paddings, const Tensor *constant_values, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *paddings() const { return _inputs[1]; }
+  const Tensor *constant_values() const { return _inputs[2]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_PAD_V2_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.test.cpp
new file mode 100644
index 000000000..41efaff06
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/PadV2.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/PadV2.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+float GetTolerance(float min, float max) { return (max - min) / 255.0; }
+
+TEST(PadV2, Uint8)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
+  std::vector<float> input_data{-0.8, 0.2, 0.9, 0.7, 0.1, -0.3};
+  std::vector<int32_t> paddings_data{0, 0, 0, 2, 1, 3, 0, 0};
+  std::vector<float> constant_values_data{0.5};
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 3, 1}, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get());
+  Tensor constant_values = makeInputTensor<DataType::U8>(
+    {1}, quant_param.first, quant_param.second, constant_values_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  PadV2 kernel(&input_tensor, &paddings_tensor, &constant_values, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data = {
+    0.5, -0.8, 0.2, 0.9, 0.5, 0.5, 0.5, 0.5, 0.7, 0.1, -0.3, 0.5, 0.5, 0.5,  //
+    0.5, 0.5,  0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,  0.5, 0.5, 0.5}; //
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 7, 1}));
+}
+
+TEST(PadV2, Float)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6};
+  std::vector<int32_t> paddings_data{1, 0, 0, 2, 0, 3, 0, 0};
+  std::vector<float> constant_values_data{7};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 2, 3, 1}, input_data, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>({4, 2}, paddings_data, memory_manager.get());
+  Tensor constant_values =
+    makeInputTensor<DataType::FLOAT32>({1}, constant_values_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  PadV2 kernel(&input_tensor, &paddings_tensor, &constant_values, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+                                     7, 7, 7, 7, 7, 7, 7, 7, 1, 2, 3, 7, 7, 7, 4, 5,
+                                     6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7};
+  std::initializer_list<int32_t> ref_output_shape{2, 4, 6, 1};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pow.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pow.cpp
new file mode 100644
index 000000000..722c64024
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pow.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pow.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Pow::Pow(const Tensor *input1, const Tensor *input2, Tensor *output)
+  : Kernel({input1, input2}, {output})
+{
+}
+
+void Pow::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
+
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Pow::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      eval<float>();
+      break;
+    case DataType::S32:
+      eval<int32_t>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T> void Pow::eval() const
+{
+  tflite::ArithmeticParams params{};
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastPow4DSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+                                              getTensorShape(input2()), getTensorData<T>(input2()),
+                                              getTensorShape(output()), getTensorData<T>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Pow(getTensorShape(input1()), getTensorData<T>(input1()),
+                               getTensorShape(input2()), getTensorData<T>(input2()),
+                               getTensorShape(output()), getTensorData<T>(output()));
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pow.h b/compiler/luci-micro/luci-interpreter/src/kernels/Pow.h
new file mode 100644
index 000000000..8ff865e40
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pow.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_POW_H
+#define LUCI_INTERPRETER_KERNELS_POW_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Pow : public Kernel
+{
+public:
+  Pow(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> void eval() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_POW_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Pow.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Pow.test.cpp
new file mode 100644
index 000000000..0e858115d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Pow.test.cpp
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pow.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class PowTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(PowTest, SimplePow)
+{
+  std::initializer_list<int32_t> base_shape = {1, 1, 3, 2};
+
+  std::vector<float> input1_data{0.3f, 2.3f, 0.9f, 0.5f, 0.8f, 1.1f};
+  std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+  std::vector<float> test_outputs{0.786f, 1.2838f, 1.043f, 0.7071f, 0.8f, 1.08956f};
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>(base_shape, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(base_shape));
+}
+
+TEST_F(PowTest, FloatBroadcastPow)
+{
+  std::initializer_list<int32_t> input1_shape = {1, 3};
+  std::initializer_list<int32_t> input2_shape = {3, 1};
+
+  std::vector<float> input1_data{0.3f, 2.3f, 0.9f};
+  std::vector<float> input2_data{0.2f, 0.3f, 0.4f};
+  std::vector<float> test_outputs{0.786f,   1.18126f, 0.9791f, 0.6968f, 1.28386f,
+                                  0.96888f, 0.6178f,  1.3953f, 0.9587f};
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f));
+}
+
+TEST_F(PowTest, IntPow)
+{
+  std::initializer_list<int32_t> base_shape = {1, 3};
+
+  std::vector<int32_t> input_data{2, 3, 4};
+  std::vector<int32_t> test_outputs{4, 27, 256};
+
+  Tensor input1_tensor =
+    makeInputTensor<DataType::S32>(base_shape, input_data, _memory_manager.get());
+  Tensor input2_tensor =
+    makeInputTensor<DataType::S32>(base_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(test_outputs));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(base_shape));
+}
+
+TEST_F(PowTest, Input_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(PowTest, Input_Type_Mismatch_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {4}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(PowTest, Invalid_Input_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.cpp
new file mode 100644
index 000000000..0c8544a65
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.cpp
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Quantize.h"
+#include "kernels/Utils.h"
+#include "PALQuantize.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+
+template <typename input_dtype> void call_requantize(const Tensor *input, Tensor *output)
+{
+  int32_t multiplier;
+  int shift;
+
+  const double effective_output_scale = input->scale() / output->scale();
+  quantizeMultiplier(effective_output_scale, &multiplier, &shift);
+
+  const auto input_shape = getTensorShape(input);
+  const auto output_shape = getTensorShape(output);
+  const auto size = tflite::MatchingFlatSize(input_shape, output_shape);
+
+  const auto input_data = getTensorData<input_dtype>(input);
+
+  switch (output->element_type())
+  {
+    case loco::DataType::S8:
+      luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
+                                       output->zero_point(), getTensorData<int8_t>(output));
+      break;
+    case loco::DataType::U8:
+      luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
+                                       output->zero_point(), getTensorData<uint8_t>(output));
+      break;
+    case loco::DataType::S16:
+      luci_interpreter_pal::Requantize(input_data, size, multiplier, shift, input->zero_point(),
+                                       output->zero_point(), getTensorData<int16_t>(output));
+      break;
+    default:
+      throw std::runtime_error("Unsupported quantized type, yet!");
+  }
+}
+
+} // namespace
+
+Quantize::Quantize(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Quantize::configure()
+{
+
+  if (input()->element_type() == loco::DataType::S16)
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0);
+
+  switch (input()->element_type())
+  {
+    case loco::DataType::FLOAT32:
+    {
+      LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::U8 ||
+                             output()->element_type() == loco::DataType::S8 ||
+                             output()->element_type() == loco::DataType::S16);
+      break;
+    }
+    case loco::DataType::S16:
+    case loco::DataType::S8:
+    case loco::DataType::U8:
+    {
+      LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::S8 ||
+                             output()->element_type() == loco::DataType::U8 ||
+                             output()->element_type() == loco::DataType::S16);
+      if (output()->element_type() == loco::DataType::S16)
+      {
+        LUCI_INTERPRETER_CHECK(output()->zero_point() == 0);
+      }
+      break;
+    }
+    default:
+      throw std::runtime_error("Unsupported type");
+  }
+
+  output()->resize(input()->shape());
+}
+
+void Quantize::execute() const
+{
+  switch (input()->element_type())
+  {
+    case loco::DataType::FLOAT32:
+    {
+      tflite::QuantizationParams op_params;
+      op_params.zero_point = output()->zero_point();
+      op_params.scale = output()->scale();
+      const auto input_data = getTensorData<float>(input());
+
+      switch (output()->element_type())
+      {
+        case loco::DataType::S8:
+        {
+          luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
+                                         getTensorShape(output()), getTensorData<int8_t>(output()));
+          break;
+        }
+        case loco::DataType::U8:
+        {
+          luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
+                                         getTensorShape(output()),
+                                         getTensorData<uint8_t>(output()));
+          break;
+        }
+        case loco::DataType::S16:
+        {
+          luci_interpreter_pal::Quantize(op_params, getTensorShape(input()), input_data,
+                                         getTensorShape(output()),
+                                         getTensorData<int16_t>(output()));
+          break;
+        }
+        default:
+          throw std::runtime_error("Unsupported type.");
+      }
+      break;
+    }
+    case loco::DataType::S16:
+    {
+      call_requantize<int16_t>(input(), output());
+      break;
+    }
+    case loco::DataType::S8:
+    {
+      call_requantize<int8_t>(input(), output());
+      break;
+    }
+    case loco::DataType::U8:
+    {
+      call_requantize<uint8_t>(input(), output());
+      break;
+    }
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.h b/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.h
new file mode 100644
index 000000000..006c5366f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_QUANTIZE_H
+#define LUCI_INTERPRETER_KERNELS_QUANTIZE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Quantize : public Kernel
+{
+public:
+  Quantize(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_QUANTIZE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.test.cpp
new file mode 100644
index 000000000..22e67fe3f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Quantize.test.cpp
@@ -0,0 +1,254 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Quantize.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class QuantizeTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(QuantizeTest, FloatUint8)
+{
+  std::vector<float> input_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64};
+
+  std::vector<uint8_t> ref_output_data{0, 1, 2, 3, 4, 251, 252, 253, 254, 255};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, /*scale*/ 0.5, /*zero_point*/ 127);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(QuantizeTest, FloatInt8)
+{
+  std::vector<float> input_data{-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64};
+
+  std::vector<int8_t> ref_output_data{-128, -127, -126, -125, -124, 123, 124, 125, 126, 127};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int8_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(QuantizeTest, FloatInt16)
+{
+  std::vector<float> input_data{-63.5, -63, -3, -2, -1, 1, 2, 3, 63.5, 64};
+
+  std::vector<int16_t> ref_output_data{-12700, -12600, -600, -400,  -200,
+                                       200,    400,    600,  12700, 12800};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 5}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, /*scale*/ 0.005, /*zero_point*/ 0);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int16_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 5}));
+}
+
+TEST_F(QuantizeTest, Int16Int16)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  std::vector<int16_t> ref_output_data{2, 4, 6, 8, 10, 12, 14, 16, 18, 20};
+
+  Tensor input_tensor = makeInputTensor<DataType::S16>(
+    {1, 1, 2, 5}, /*scale*/ 1.0, /*zero_point*/ 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, /*scale*/ 0.5, /*zero_point*/ 0);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int16_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5}));
+}
+
+TEST_F(QuantizeTest, Int8Int8)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  std::vector<int8_t> ref_output_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
+
+  Tensor input_tensor = makeInputTensor<DataType::S8>(
+    {1, 1, 2, 5}, /*scale*/ 0.5, /*zero_point*/ -1, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int8_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5}));
+}
+
+TEST_F(QuantizeTest, Uint8Uint8)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  std::vector<uint8_t> ref_output_data{129, 131, 133, 135, 137, 139, 141, 143, 145, 147};
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 1, 2, 5}, /*scale*/ 0.5, /*zero_point*/ 127, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, /*scale*/ 0.5, /*zero_point*/ 127);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5}));
+}
+
+TEST_F(QuantizeTest, Int16Int8)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  std::vector<int8_t> ref_output_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 19};
+
+  Tensor input_tensor = makeInputTensor<DataType::S16>(
+    {1, 1, 2, 5}, /*scale*/ 1.0, /*zero_point*/ 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int8_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 5}));
+}
+
+TEST_F(QuantizeTest, InvalidInputType_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S8, /*scale*/ 0.5, /*zero_point*/ -1);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidOutputTypeForFloatInput_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({1, 1, 2, 5}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidOutputTypeForInt16Input_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidOutputTypeForInt8Input_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S8>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidOutputTypeForUint8Input_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({1, 1, 2, 5}, 0.5, 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(QuantizeTest, InvalidInputZeroPoint_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 1, 2, 5}, 0.5, -1, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+
+  Quantize kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Relu.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Relu.cpp
new file mode 100644
index 000000000..747ec6cc8
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Relu.cpp
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Relu.h"
+#include "kernels/Utils.h"
+
+#include "PALRelu.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Relu::Relu(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Relu::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  if (input()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+  }
+
+  if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16)
+  {
+    double multiplier = input()->scale() / output()->scale();
+    quantizeMultiplier(multiplier, &_output_multiplier, &_output_shift);
+  }
+  output()->resize(input()->shape());
+}
+
+void Relu::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Relu::evalFloat() const
+{
+  const auto input_data = getTensorData<float>(input());
+  const auto input_shape = getTensorShape(input());
+  auto output_data = getTensorData<float>(output());
+  auto output_shape = getTensorShape(output());
+
+  luci_interpreter_pal::Relu(input_shape, input_data, output_shape, output_data);
+}
+
+void Relu::evalQuantized() const
+{
+  tflite::ReluParams params;
+  params.input_offset = input()->zero_point();
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = _output_multiplier;
+  params.output_shift = _output_shift;
+
+  params.quantized_activation_min =
+    std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset);
+  params.quantized_activation_max = static_cast<int32_t>(std::numeric_limits<uint8_t>::max());
+
+  luci_interpreter_pal::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+                              getTensorShape(output()), getTensorData<uint8_t>(output()));
+}
+
+void Relu::evalQuantizedS16() const
+{
+  const auto *input_data = getTensorData<int16_t>(input());
+  auto *output_data = getTensorData<int16_t>(output());
+
+  constexpr int32_t output_min = 0;
+  constexpr int32_t output_max = std::numeric_limits<int16_t>::max();
+
+  const int32_t num_elements = input()->shape().num_elements();
+
+  for (int32_t i = 0; i < num_elements; ++i)
+  {
+    const int32_t input_val = input_data[i];
+    int32_t output_val =
+      tflite::MultiplyByQuantizedMultiplier(input_val, _output_multiplier, _output_shift);
+    output_val = std::max(output_val, output_min);
+    output_val = std::min(output_val, output_max);
+    output_data[i] = static_cast<int16_t>(output_val);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Relu.h b/compiler/luci-micro/luci-interpreter/src/kernels/Relu.h
new file mode 100644
index 000000000..b813f0cdf
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Relu.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RELU_H
+#define LUCI_INTERPRETER_KERNELS_RELU_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Relu : public Kernel
+{
+public:
+  Relu(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void evalQuantizedS16() const;
+
+private:
+  int32_t _output_multiplier{0};
+  int32_t _output_shift{0};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RELU_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Relu.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Relu.test.cpp
new file mode 100644
index 000000000..bd32e3cc9
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Relu.test.cpp
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Relu.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ReluTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(ReluTest, FloatSimple)
+{
+  std::vector<float> input_data{
+    0.0f, 1.0f,  3.0f,  // Row 1
+    1.0f, -1.0f, -2.0f, // Row 2
+  };
+
+  std::vector<float> ref_output_data{
+    0.0f, 1.0f, 3.0f, // Row 1
+    1.0f, 0.0f, 0.0f, // Row 2
+  };
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Relu kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(ReluTest, Uint8Quantized)
+{
+  std::vector<float> input_data{
+    0, -6, 2, 4, //
+    3, -2, 7, 1, //
+  };
+  // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+  const float f_min = (-128.0 / 128.0) * 8;
+  const float f_max = (127.0 / 128.0) * 8;
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(f_min, f_max);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  Relu kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray({128, 128, 160, 192, 176, 128, 240, 144}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0, 0, 2, 4, 3, 0, 7, 1}));
+}
+
+TEST_F(ReluTest, Uint8Requantized)
+{
+  std::vector<float> input_data{
+    0, -6, 2, 4, //
+    3, -2, 7, 1, //
+  };
+
+  // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+  const float in_min = (-128.0 / 128.0) * 8;
+  const float in_max = (127.0 / 128.0) * 8;
+  const float out_min = (0.0 / 256.0) * 8;
+  const float out_max = (255.0 / 256.0) * 8;
+
+  std::pair<float, int32_t> quant_input = quantizationParams<uint8_t>(in_min, in_max);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_input.first, quant_input.second, input_data, _memory_manager.get());
+
+  std::pair<float, int32_t> quant_output = quantizationParams<uint8_t>(out_min, out_max);
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_output.first, quant_output.second);
+
+  Relu kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray({0, 0, 64, 128, 96, 0, 224, 32}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0, 0, 2, 4, 3, 0, 7, 1}));
+}
+
+TEST_F(ReluTest, SInt16)
+{
+  std::vector<float> input_data{
+    0, -6, 2, 4, //
+    3, -2, 7, 1, //
+  };
+  std::vector<float> ref_output_data{
+    0, 0, 2, 4, //
+    3, 0, 7, 1, //
+  };
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 2, 4, 1}, 0.5, 0, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.25, 0);
+
+  Relu kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(ReluTest, Input_Output_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Relu kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(ReluTest, Invalid_Input_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  Relu kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.cpp
new file mode 100644
index 000000000..07205ed3a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Relu6.h"
+#include "kernels/Utils.h"
+
+#include "PALRelu6.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Relu6::Relu6(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Relu6::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+  if (input()->element_type() == DataType::U8)
+  {
+    double multiplier = input()->scale() / output()->scale();
+    quantizeMultiplier(multiplier, &_output_multiplier, &_output_shift);
+  }
+  output()->resize(input()->shape());
+}
+
+void Relu6::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Relu6::evalFloat() const
+{
+  const auto input_data = getTensorData<float>(input());
+  const auto input_shape = getTensorShape(input());
+  auto output_data = getTensorData<float>(output());
+  auto output_shape = getTensorShape(output());
+
+  luci_interpreter_pal::Relu6(input_shape, input_data, output_shape, output_data);
+}
+
+void Relu6::evalQuantized() const
+{
+  tflite::ReluParams params;
+  params.input_offset = input()->zero_point();
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = _output_multiplier;
+  params.output_shift = _output_shift;
+
+  params.quantized_activation_min =
+    std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset);
+  params.quantized_activation_max =
+    std::min(static_cast<int32_t>(std::numeric_limits<uint8_t>::max()),
+             params.output_offset + static_cast<int32>(roundf(6.f / output()->scale())));
+
+  luci_interpreter_pal::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+                              getTensorShape(output()), getTensorData<uint8_t>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.h b/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.h
new file mode 100644
index 000000000..f5030b588
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RELU6_H
+#define LUCI_INTERPRETER_KERNELS_RELU6_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Relu6 : public Kernel
+{
+public:
+  Relu6(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _output_multiplier{0};
+  int32_t _output_shift{0};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RELU6_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.test.cpp
new file mode 100644
index 000000000..af7b3f3db
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Relu6.test.cpp
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Relu6.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class Relu6Test : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(Relu6Test, FloatSimple)
+{
+  std::vector<float> input_data{
+    0.0f, 1.0f,  3.0f,  // Row 1
+    7.0f, -1.0f, -2.0f, // Row 2
+  };
+
+  std::vector<float> ref_output_data{
+    0.0f, 1.0f, 3.0f, // Row 1
+    6.0f, 0.0f, 0.0f, // Row 2
+  };
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 3}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Relu6 kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST_F(Relu6Test, Uint8Quantized)
+{
+  // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+  const float f_min = (-128.0 / 128.0) * 10;
+  const float f_max = (127.0 / 128.0) * 10;
+  const float tolerance = (f_max - f_min) / 255.0;
+
+  std::vector<float> input_data{
+    0,  -6, 2, 8, //
+    -2, 3,  7, 1, //
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(f_min, f_max);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_param.first, quant_param.second, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  Relu6 kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray({128, 128, 154, 205, 128, 166, 205, 141}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear({0, 0, 2, 6, 0, 3, 6, 1}, tolerance));
+}
+
+TEST_F(Relu6Test, Uint8Requantized)
+{
+  // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+  const float in_min = (-128.0 / 128.0) * 10;
+  const float in_max = (127.0 / 128.0) * 10;
+  const float out_min = (0.0 / 256.0) * 0;
+  const float out_max = (255.0 / 256.0) * 6;
+  const float tolerance = (in_max - in_min) / 255.0;
+
+  std::vector<float> input_data{
+    0,  -6, 2, 8, //
+    -2, 3,  7, 1, //
+  };
+
+  std::pair<float, int32_t> quant_input = quantizationParams<uint8_t>(in_min, in_max);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 4, 1}, quant_input.first, quant_input.second, input_data, _memory_manager.get());
+
+  std::pair<float, int32_t> quant_output = quantizationParams<uint8_t>(out_min, out_max);
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_output.first, quant_output.second);
+
+  Relu6 kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray({0, 0, 87, 255, 0, 127, 255, 43}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear({0, 0, 2, 6, 0, 3, 6, 1}, tolerance));
+}
+
+TEST_F(Relu6Test, Input_Output_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Relu6 kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(Relu6Test, Invalid_Input_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  Relu6 kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.cpp
new file mode 100644
index 000000000..61d3300b2
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Reshape.h"
+
+#include <cassert>
+#include <cstring>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+static Shape extractShapeFromTensor(const Tensor *tensor)
+{
+  assert(tensor->element_type() == DataType::S32);
+  Shape shape(tensor->shape().num_elements());
+  const auto *shape_data = tensor->data<int32_t>();
+  for (int i = 0; i < tensor->shape().num_elements(); ++i)
+  {
+    shape.dim(i) = shape_data[i];
+  }
+  return shape;
+}
+
+static void resolveUnknownDimension(const Shape &input_shape, Shape *output_shape)
+{
+  const int32_t num_input_elements = input_shape.num_elements();
+  int32_t num_output_elements = 1;
+  int unknown_dim_index = -1;
+  for (int i = 0; i < output_shape->num_dims(); ++i)
+  {
+    const int32_t value = output_shape->dim(i);
+    if (value == -1)
+    {
+      assert(unknown_dim_index == -1);
+      unknown_dim_index = i;
+    }
+    else
+    {
+      num_output_elements *= value;
+    }
+  }
+  if (unknown_dim_index != -1)
+  {
+    output_shape->dim(unknown_dim_index) = num_input_elements / num_output_elements;
+    num_output_elements *= output_shape->dim(unknown_dim_index);
+  }
+  assert(num_output_elements == num_input_elements);
+}
+
+Reshape::Reshape(const Tensor *input, const Tensor *shape, Tensor *output)
+  : Kernel({input, shape}, {output})
+{
+}
+
+void Reshape::configure()
+{
+  Shape output_shape = extractShapeFromTensor(shape());
+  resolveUnknownDimension(input()->shape(), &output_shape);
+  output()->resize(output_shape);
+}
+
+void Reshape::execute() const
+{
+  const auto *input_data = input()->data<void>();
+  auto *output_data = output()->data<void>();
+
+  const size_t element_size = getDataTypeSize(input()->element_type());
+  const int32_t num_elements = input()->shape().num_elements();
+  std::memcpy(output_data, input_data, num_elements * element_size);
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.h b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.h
new file mode 100644
index 000000000..99b947f77
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RESHAPE_H
+#define LUCI_INTERPRETER_KERNELS_RESHAPE_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Reshape : public Kernel
+{
+public:
+  Reshape(const Tensor *input, const Tensor *shape, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *shape() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RESHAPE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.test.cpp
new file mode 100644
index 000000000..c2ff3ea1b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Reshape.test.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Reshape.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ReshapeTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+// TODO Test types other than FLOAT32.
+
+TEST_F(ReshapeTest, Regular)
+{
+  Shape input_shape{1, 2, 2, 3};
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  Shape shape_shape{2};
+  std::vector<int32_t> shape_data{3, 4};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor shape_tensor =
+    makeInputTensor<DataType::S32>(shape_shape, shape_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Reshape kernel(&input_tensor, &shape_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(input_data));
+}
+
+TEST_F(ReshapeTest, UnknownDimension)
+{
+  Shape input_shape{2, 1, 2, 3};
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  Shape shape_shape{3};
+  std::vector<int32_t> shape_data{2, -1, 2};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor shape_tensor =
+    makeInputTensor<DataType::S32>(shape_shape, shape_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Reshape kernel(&input_tensor, &shape_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(input_data));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.cpp
new file mode 100644
index 000000000..e2ddd6a7b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ResizeBilinear.h"
+
+#include "kernels/Utils.h"
+
+#include "PALResizeBilinear.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ResizeBilinear::ResizeBilinear(const Tensor *input, const Tensor *size, Tensor *output,
+                               const ResizeBilinearParams &params)
+  : KernelWithParams<ResizeBilinearParams>({input, size}, {output}, params)
+{
+}
+
+void ResizeBilinear::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(size()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(size()->element_type() == DataType::S32);
+  if (params().half_pixel_centers && params().align_corners)
+    throw std::runtime_error("If half_pixel_centers is True, align_corners must be False.");
+  LUCI_INTERPRETER_CHECK(size()->shape().dim(0) == 2);
+  Shape output_shape(4);
+  output_shape.dim(0) = input()->shape().dim(0);
+  output_shape.dim(1) = getTensorData<int32_t>(size())[0];
+  output_shape.dim(2) = getTensorData<int32_t>(size())[1];
+  output_shape.dim(3) = input()->shape().dim(3);
+  output()->resize(output_shape);
+}
+
+void ResizeBilinear::execute() const
+{
+  tflite::ResizeBilinearParams op_params{};
+  op_params.align_corners = params().align_corners;
+  op_params.half_pixel_centers = params().half_pixel_centers;
+  switch (output()->element_type())
+  {
+    case DataType::FLOAT32:
+      luci_interpreter_pal::ResizeBilinear(
+        op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(size()),
+        getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      luci_interpreter_pal::ResizeBilinear(
+        op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(size()),
+        getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.h b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.h
new file mode 100644
index 000000000..b7bdc2ab7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RESIZEBILINEAR_H
+#define LUCI_INTERPRETER_KERNELS_RESIZEBILINEAR_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ResizeBilinear : public KernelWithParams<ResizeBilinearParams>
+{
+public:
+  ResizeBilinear(const Tensor *input, const Tensor *shape, Tensor *output,
+                 const ResizeBilinearParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *size() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RESIZEBILINEAR_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.test.cpp
new file mode 100644
index 000000000..933a1128c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeBilinear.test.cpp
@@ -0,0 +1,255 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ResizeBilinear.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> size_shape,
+           std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data,
+           std::initializer_list<int32_t> size_data, std::initializer_list<float> output_data,
+           bool align_corners, bool half_pixel_centers)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeBilinearParams params{};
+  params.align_corners = align_corners;
+  params.half_pixel_centers = half_pixel_centers;
+
+  ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
+}
+
+template <>
+void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
+                    std::initializer_list<int32_t> size_shape,
+                    std::initializer_list<int32_t> output_shape,
+                    std::initializer_list<float> input_data,
+                    std::initializer_list<int32_t> size_data,
+                    std::initializer_list<float> output_data, bool align_corners,
+                    bool half_pixel_centers)
+{
+  // On TFlite example use Uint8 value it self, so this means quant param scale 1.0f and zero
+  // point 0.
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, 1.0, 0, input_data, memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 1.0, 0);
+
+  ResizeBilinearParams params{};
+  params.align_corners = align_corners;
+  params.half_pixel_centers = half_pixel_centers;
+
+  ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, output_tensor.scale()));
+}
+
+template <typename T> class ResizeBilinearTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(ResizeBilinearTest, DataTypes);
+
+TYPED_TEST(ResizeBilinearTest, SimpleTest)
+{
+  Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+                   {
+                     3, 6,  //
+                     9, 12, //
+                     4, 10, //
+                     10, 16 //
+                   },
+                   {3, 3},
+                   {
+                     3, 5, 6,    //
+                     7, 9, 10,   //
+                     9, 11, 12,  //
+                     4, 8, 10,   //
+                     8, 12, 14,  //
+                     10, 14, 16, //
+                   },
+                   false, false);
+  SUCCEED();
+}
+
+TEST(ResizeBilinearTest, HalfPixelCenterFloatTest)
+{
+  Check<float>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+               {
+                 1, 2, //
+                 3, 4, //
+                 1, 2, //
+                 3, 4  //
+               },
+               {3, 3},
+               {
+                 1, 1.5, 2, //
+                 2, 2.5, 3, //
+                 3, 3.5, 4, //
+                 1, 1.5, 2, //
+                 2, 2.5, 3, //
+                 3, 3.5, 4, //
+               },
+               false, true);
+  SUCCEED();
+}
+
+TEST(ResizeBilinearTest, HalfPixelCenterUint8Test)
+{
+  Check<uint8_t>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+                 {
+                   3, 6,  //
+                   9, 12, //
+                   4, 10, //
+                   12, 16 //
+                 },
+                 {3, 3},
+                 {
+                   2, 4, 6,    //
+                   6, 7, 9,    //
+                   9, 10, 12,  //
+                   4, 7, 10,   //
+                   8, 10, 13,  //
+                   12, 14, 16, //
+                 },
+                 false, true);
+  SUCCEED();
+}
+
+TEST(ResizeBilinearTest, InputShapeInvalid_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2},
+                                                           {
+                                                             3, 6,  //
+                                                             9, 12, //
+                                                             4, 10, //
+                                                             10, 16 //
+                                                           },
+                                                           memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeBilinearParams params{};
+  params.align_corners = false;
+  params.half_pixel_centers = false;
+
+  ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ResizeBilinearTest, SizeShapeInvalid_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1},
+                                                           {
+                                                             3, 6,  //
+                                                             9, 12, //
+                                                             4, 10, //
+                                                             10, 16 //
+                                                           },
+                                                           memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>({2, 1}, {3, 3}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeBilinearParams params{};
+  params.align_corners = false;
+  params.half_pixel_centers = false;
+
+  ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ResizeBilinearTest, SizeDimInvalid_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1},
+                                                           {
+                                                             3, 6,  //
+                                                             9, 12, //
+                                                             4, 10, //
+                                                             10, 16 //
+                                                           },
+                                                           memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>({3}, {3, 3, 1}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeBilinearParams params{};
+  params.align_corners = false;
+  params.half_pixel_centers = false;
+
+  ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ResizeBilinearTest, InvalidParams_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1},
+                                                           {
+                                                             3, 6,  //
+                                                             9, 12, //
+                                                             4, 10, //
+                                                             10, 16 //
+                                                           },
+                                                           memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeBilinearParams params{};
+  params.align_corners = true;
+  params.half_pixel_centers = true;
+
+  ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp
new file mode 100644
index 000000000..306cefbc2
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ResizeNearestNeighbor.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/resize_nearest_neighbor.h>
+#include "PALResizeNearestNeighbor.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ResizeNearestNeighbor::ResizeNearestNeighbor(const Tensor *input, const Tensor *size,
+                                             Tensor *output,
+                                             const ResizeNearestNeighborParams &params)
+  : KernelWithParams<ResizeNearestNeighborParams>({input, size}, {output}, params)
+{
+}
+
+void ResizeNearestNeighbor::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(size()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(size()->element_type() == DataType::S32);
+  LUCI_INTERPRETER_CHECK(size()->shape().dim(0) == 2);
+  Shape output_shape(4);
+  output_shape.dim(0) = input()->shape().dim(0);
+  output_shape.dim(1) = getTensorData<int32_t>(size())[0];
+  output_shape.dim(2) = getTensorData<int32_t>(size())[1];
+  output_shape.dim(3) = input()->shape().dim(3);
+  output()->resize(output_shape);
+}
+
+void ResizeNearestNeighbor::execute() const
+{
+  tflite::ResizeNearestNeighborParams op_params{};
+  op_params.align_corners = params().align_corners;
+  op_params.half_pixel_centers = params().half_pixel_centers;
+  switch (output()->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::reference_ops::ResizeNearestNeighbor(
+        op_params, getTensorShape(input()), getTensorData<int32_t>(input()), getTensorShape(size()),
+        getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<int32_t>(output()));
+      break;
+    case DataType::U8:
+      luci_interpreter_pal::ResizeNearestNeighbor(
+        op_params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(size()),
+        getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.h b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.h
new file mode 100644
index 000000000..137d031cf
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RESIZENEARESTNEIGHBOR_H
+#define LUCI_INTERPRETER_KERNELS_RESIZENEARESTNEIGHBOR_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ResizeNearestNeighbor : public KernelWithParams<ResizeNearestNeighborParams>
+{
+public:
+  ResizeNearestNeighbor(const Tensor *input, const Tensor *shape, Tensor *output,
+                        const ResizeNearestNeighborParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *size() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RESIZENEARESTNEIGHBOR_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp
new file mode 100644
index 000000000..7ade02a6f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ResizeNearestNeighbor.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> size_shape,
+           std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data,
+           std::initializer_list<int32_t> size_data, std::initializer_list<float> output_data,
+           bool align_corners, bool half_pixel_centers)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeNearestNeighborParams params{};
+  params.align_corners = align_corners;
+  params.half_pixel_centers = half_pixel_centers;
+
+  ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
+}
+
+template <>
+void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
+                    std::initializer_list<int32_t> size_shape,
+                    std::initializer_list<int32_t> output_shape,
+                    std::initializer_list<float> input_data,
+                    std::initializer_list<int32_t> size_data,
+                    std::initializer_list<float> output_data, bool align_corners,
+                    bool half_pixel_centers)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  std::pair<float, int32_t> quant_param =
+    quantizationParams<uint8_t>(std::min(input_data) < 0 ? std::min(input_data) : 0.f,
+                                std::max(input_data) > 0 ? std::max(input_data) : 0.f);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    input_shape, quant_param.first, quant_param.second, input_data, memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.first);
+
+  ResizeNearestNeighborParams params{};
+  params.align_corners = align_corners;
+  params.half_pixel_centers = half_pixel_centers;
+
+  ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, output_tensor.scale()));
+}
+
+template <typename T> class ResizeNearestNeighborTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(ResizeNearestNeighborTest, DataTypes);
+
+TYPED_TEST(ResizeNearestNeighborTest, SimpleTest)
+{
+  Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+                   {
+                     3, 6,  //
+                     9, 12, //
+                     4, 10, //
+                     10, 16 //
+                   },
+                   {3, 3},
+                   {
+                     3, 3, 6,    //
+                     3, 3, 6,    //
+                     9, 9, 12,   //
+                     4, 4, 10,   //
+                     4, 4, 10,   //
+                     10, 10, 16, //
+                   },
+                   false, false);
+}
+
+TYPED_TEST(ResizeNearestNeighborTest, AlignCenterTest)
+{
+  Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+                   {
+                     3, 6,  //
+                     9, 12, //
+                     4, 10, //
+                     10, 16 //
+                   },
+                   {3, 3},
+                   {
+                     3, 6, 6,    //
+                     9, 12, 12,  //
+                     9, 12, 12,  //
+                     4, 10, 10,  //
+                     10, 16, 16, //
+                     10, 16, 16, //
+                   },
+                   true, false);
+}
+
+TYPED_TEST(ResizeNearestNeighborTest, HalfPixelCenterTest)
+{
+  Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+                   {
+                     3, 6,  //
+                     9, 12, //
+                     4, 10, //
+                     10, 16 //
+                   },
+                   {3, 3},
+                   {
+                     3, 6, 6,    //
+                     9, 12, 12,  //
+                     9, 12, 12,  //
+                     4, 10, 10,  //
+                     10, 16, 16, //
+                     10, 16, 16, //
+                   },
+                   false, true);
+}
+
+TEST(ResizeNearestNeighborTest, InputShapeInvalid_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2},
+                                                           {
+                                                             3, 6,  //
+                                                             9, 12, //
+                                                             4, 10, //
+                                                             10, 16 //
+                                                           },
+                                                           memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeNearestNeighborParams params{};
+  params.align_corners = false;
+  params.half_pixel_centers = false;
+
+  ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ResizeNearestNeighborTest, SizeShapeInvalid_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1},
+                                                           {
+                                                             3, 6,  //
+                                                             9, 12, //
+                                                             4, 10, //
+                                                             10, 16 //
+                                                           },
+                                                           memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>({2, 1}, {3, 3}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeNearestNeighborParams params{};
+  params.align_corners = false;
+  params.half_pixel_centers = false;
+
+  ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ResizeNearestNeighborTest, SizeDimInvalid_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1},
+                                                           {
+                                                             3, 6,  //
+                                                             9, 12, //
+                                                             4, 10, //
+                                                             10, 16 //
+                                                           },
+                                                           memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>({3}, {3, 3, 1}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeNearestNeighborParams params{};
+  params.align_corners = false;
+  params.half_pixel_centers = false;
+
+  ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.cpp
new file mode 100644
index 000000000..1b6a5cc3b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ReverseV2.h"
+#include "kernels/Utils.h"
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+ReverseV2::ReverseV2(const Tensor *input, const Tensor *axes, Tensor *output)
+  : Kernel({input, axes}, {output})
+{
+}
+
+void ReverseV2::configure()
+{
+  assert(axes()->shape().num_dims() == 1);
+  assert(input()->shape().num_dims() >= axes()->shape().num_elements());
+  if (input()->element_type() != DataType::S32 && input()->element_type() != DataType::FLOAT32 &&
+      input()->element_type() != DataType::U8 && input()->element_type() != DataType::S16 &&
+      input()->element_type() != DataType::S64)
+  {
+    throw std::runtime_error("Unsupported input type.");
+  }
+  if (axes()->element_type() != DataType::S32)
+  {
+    throw std::runtime_error("Unsupported axes type.");
+  }
+  if (axes()->shape().num_elements() > 1)
+  {
+    throw std::runtime_error("Current implementation does not support more than 1 axis.");
+  }
+  int axis_value = getTensorData<int32_t>(axes())[0];
+  if (axis_value < 0 || axis_value >= input()->shape().num_dims())
+  {
+    throw std::runtime_error("Invalid axes value");
+  }
+  assert(input()->element_type() == output()->element_type());
+
+  output()->resize(input()->shape());
+}
+
+void ReverseV2::execute() const
+{
+  int axis_value = getTensorData<int32_t>(axes())[0];
+  switch (output()->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::reference_ops::Reverse<float>(axis_value, getTensorShape(input()),
+                                            getTensorData<float>(input()), getTensorShape(output()),
+                                            getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      tflite::reference_ops::Reverse<uint8_t>(
+        axis_value, getTensorShape(input()), getTensorData<uint8_t>(input()),
+        getTensorShape(output()), getTensorData<uint8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported output type");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.h b/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.h
new file mode 100644
index 000000000..51211c703
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_REVERSE_H
+#define LUCI_INTERPRETER_KERNELS_REVERSE_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ReverseV2 : public Kernel
+{
+public:
+  ReverseV2(const Tensor *input, const Tensor *axes, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *axes() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_REVERSE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.test.cpp
new file mode 100644
index 000000000..c0025faca
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/ReverseV2.test.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ReverseV2.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T> class ReverseV2Test : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(ReverseV2Test, DataTypes);
+
+TYPED_TEST(ReverseV2Test, MultiDimensions)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  // TypeParam
+  std::vector<TypeParam> input_data{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
+                                    13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24};
+  Shape input_shape{4, 3, 2};
+  std::vector<int32_t> axis_data{1};
+  Shape axis_shape{1};
+
+  std::vector<TypeParam> output_data{5,  6,  3,  4,  1,  2,  11, 12, 9,  10, 7,  8,
+                                     17, 18, 15, 16, 13, 14, 23, 24, 21, 22, 19, 20};
+  std::vector<int32_t> output_shape{4, 3, 2};
+
+  Tensor input_tensor =
+    makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data, memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>(axis_shape, axis_data, memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>());
+
+  ReverseV2 kernel = ReverseV2(&input_tensor, &axis_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
+              ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.cpp
new file mode 100644
index 000000000..6dd92dc98
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Rsqrt.h"
+#include "kernels/Utils.h"
+
+#include <stdexcept>
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Rsqrt::Rsqrt(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Rsqrt::configure()
+{
+  if (input()->element_type() != output()->element_type())
+  {
+    throw std::runtime_error("Input/output tensor data type mismatch.");
+  }
+  output()->resize(input()->shape());
+}
+
+void Rsqrt::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Rsqrt::evalFloat() const
+{
+  auto in = getTensorData<float>(input());
+  auto out = getTensorData<float>(output());
+  auto size = getTensorShape(input()).FlatSize();
+  for (auto i = in; i != in + size; ++i)
+  {
+    *out = 1.f / std::sqrt(*i);
+    ++out;
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.h b/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.h
new file mode 100644
index 000000000..adc5bcfa2
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RSQRT_H
+#define LUCI_INTERPRETER_KERNELS_RSQRT_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Rsqrt : public Kernel
+{
+public:
+  Rsqrt(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RSQRT_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.test.cpp
new file mode 100644
index 000000000..3c6494232
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Rsqrt.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Rsqrt.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Rsqrt kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(RsqrtTest, SimpleRsqrt)
+{
+  Check(
+    /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
+    /*input_data=*/
+    {
+      5, 4, 8, 2,     //
+      6, 7.5, 9, 0.3, //
+    },
+    /*output_data=*/
+    {
+      0.44721360, 0.5, 0.35355339, 0.70710678,       //
+      0.40824829, 0.36514837, 0.33333333, 1.8257419, //
+    });
+}
+
+TEST(RsqrtTest, Input_Output_Type_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  Rsqrt kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(RsqrtTest, Invalid_Input_Type_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  Rsqrt kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.cpp
new file mode 100644
index 000000000..40d79aaa3
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.cpp
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SVDF.h"
+#include "kernels/Utils.h"
+#include "PALSVDF.h"
+
+#include <tensorflow/lite/kernels/internal/quantization_util.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+TfLiteFusedActivation get_tflite_activation(Activation activation)
+{
+  switch (activation)
+  {
+    case luci::FusedActFunc::RELU:
+      return kTfLiteActRelu;
+    case luci::FusedActFunc::RELU6:
+      return kTfLiteActRelu6;
+    case luci::FusedActFunc::RELU_N1_TO_1:
+      return kTfLiteActReluN1To1;
+    case luci::FusedActFunc::TANH:
+      return kTfLiteActTanh;
+    case luci::FusedActFunc::SIGN_BIT:
+      return kTfLiteActSignBit;
+    case luci::FusedActFunc::NONE:
+      return kTfLiteActNone;
+    default:
+      throw std::runtime_error("Unsupported activation type");
+  }
+}
+} // namespace
+
+SVDF::SVDF(const Tensor *input, const Tensor *weight_feature, const Tensor *weight_time,
+           const Tensor *bias, const Tensor *input_activation_state, Tensor *output,
+           Tensor *scratchpad_activation_state, Tensor *scratchpad_1, Tensor *scratchpad_2,
+           Tensor *scratchpad_3, Tensor *scratchpad_4, Tensor *scratchpad_5, Tensor *scratchpad_6,
+           const SVDFParams &params)
+  : KernelWithParams<SVDFParams>({input, weight_feature, weight_time, bias, input_activation_state},
+                                 {output, scratchpad_activation_state, scratchpad_1, scratchpad_2,
+                                  scratchpad_3, scratchpad_4, scratchpad_5, scratchpad_6},
+                                 params)
+{
+  // Do nothing
+}
+
+void SVDF::configure()
+{
+  const Shape &input_shape = input()->shape();
+  const Shape &weight_features_shape = weight_feature()->shape();
+  const Shape &weight_time_shape = weight_time()->shape();
+
+  // Validate Input Tensor:
+  LUCI_INTERPRETER_CHECK(input()->element_type() == loco::DataType::FLOAT32 ||
+                         input()->element_type() == loco::DataType::S8);
+  LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 2);
+
+  // Validate inputs and output types
+  if (input()->element_type() == loco::DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(weight_feature()->element_type() == loco::DataType::S8);
+    LUCI_INTERPRETER_CHECK(weight_time()->element_type() == loco::DataType::S16 ||
+                           weight_time()->element_type() == loco::DataType::S8);
+    if (bias())
+      LUCI_INTERPRETER_CHECK(bias()->element_type() == loco::DataType::S32);
+
+    LUCI_INTERPRETER_CHECK(input_activation_state()->element_type() == loco::DataType::S16 ||
+                           input_activation_state()->element_type() == loco::DataType::S8);
+    LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::S8);
+
+    // Note: now tflite support only ReLU activation for integer SVDF
+    LUCI_INTERPRETER_CHECK(params().activation == luci::FusedActFunc::RELU);
+  }
+  else if (weight_feature()->element_type() == loco::DataType::FLOAT32)
+  {
+    LUCI_INTERPRETER_CHECK(weight_feature()->element_type() == loco::DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(weight_time()->element_type() == loco::DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(input_activation_state()->element_type() == loco::DataType::FLOAT32);
+    if (bias())
+      LUCI_INTERPRETER_CHECK(bias()->element_type() == loco::DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(output()->element_type() == loco::DataType::FLOAT32);
+  }
+  else if ((weight_feature()->element_type() == loco::DataType::U8 ||
+            weight_feature()->element_type() == loco::DataType::S8) &&
+           input()->element_type() == loco::DataType::FLOAT32)
+  {
+    // TODO:: support hybrid SVDF op
+    throw std::runtime_error("Hybrid type is not currently supported");
+  }
+  else
+  {
+    throw std::runtime_error("Unsupported type.");
+  }
+
+  // Check all the parameters of tensor match within themselves and match the
+  // input configuration.
+  const int rank = params().svdf_rank;
+  const int batch_size = input_shape.dim(0);
+  const int num_filters = weight_features_shape.dim(0);
+  LUCI_INTERPRETER_CHECK(rank != 0);
+  LUCI_INTERPRETER_CHECK(num_filters % rank == 0);
+
+  const int num_units = num_filters / rank;
+  const int memory_size = weight_time_shape.dim(1);
+
+  // Validate Weight_Feature Input Tensor:
+  LUCI_INTERPRETER_CHECK(weight_features_shape.num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(weight_features_shape.dim(1) == input_shape.dim(1));
+
+  // Validate Weight_Time Input Tensor:
+  LUCI_INTERPRETER_CHECK(weight_time_shape.num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(weight_time_shape.dim(0) == num_filters);
+
+  // Validate Bias
+  if (bias())
+    LUCI_INTERPRETER_CHECK(bias()->shape().dim(0) == num_units);
+
+  // Validate Input Activation State
+  LUCI_INTERPRETER_CHECK(input_activation_state()->shape().num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(input_activation_state()->shape().dim(0) == batch_size);
+  LUCI_INTERPRETER_CHECK(input_activation_state()->shape().dim(1) == memory_size * num_filters);
+
+  // Resize scratchpad_state to input_activation_state
+  auto scratchpad_activation_state = getOutputTensors()[1];
+  scratchpad_activation_state->resize({batch_size, memory_size * num_filters});
+
+  // Resize output tensor
+  output()->resize({batch_size, num_units});
+
+  luci_interpreter_pal::SetupScratchpadTensor(
+    input()->element_type(), weight_feature()->element_type(), getOutputTensors()[2],
+    getOutputTensors()[3], getOutputTensors()[4], getOutputTensors()[5], getOutputTensors()[6],
+    getOutputTensors()[7], input_shape, weight_time_shape, batch_size, num_filters, num_units);
+}
+
+void SVDF::execute() const
+{
+  switch (weight_feature()->element_type())
+  {
+    case loco::DataType::FLOAT32:
+      evalFloat();
+      break;
+    case loco::DataType::S8:
+    {
+      if (input()->element_type() == loco::DataType::S8)
+        evalInteger();
+      else
+        // TODO:: support hybrid SVDF op
+        throw std::runtime_error("Hybrid type is not currently supported");
+      break;
+    }
+    default:
+      throw std::runtime_error("Unsupported type");
+  }
+}
+
+void SVDF::evalInteger() const
+{
+  const auto effective_scale_1 = static_cast<double>(input()->scale() * weight_feature()->scale() /
+                                                     input_activation_state()->scale());
+  const auto effective_scale_2 = static_cast<double>(input_activation_state()->scale() *
+                                                     weight_time()->scale() / output()->scale());
+
+  int32_t effective_scale_1_a;
+  int effective_scale_1_b;
+  int32_t effective_scale_2_a;
+  int effective_scale_2_b;
+
+  tflite::QuantizeMultiplier(effective_scale_1, &effective_scale_1_a, &effective_scale_1_b);
+  tflite::QuantizeMultiplier(effective_scale_2, &effective_scale_2_a, &effective_scale_2_b);
+
+  TfLiteSVDFParams params_svdf{};
+  params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs;
+  params_svdf.rank = params().svdf_rank;
+  params_svdf.activation = get_tflite_activation(params().activation);
+
+  auto scratchpad_activation_state = getOutputTensors()[1];
+  // Note: it is expected that activation_state input variable tensor reset to zero,
+  // also expected that this variable tensor doesn't have buffer
+  auto scratchpad_data = getTensorData<int16_t>(scratchpad_activation_state);
+  std::fill_n(scratchpad_data, scratchpad_activation_state->shape().num_elements(), 0);
+
+  auto scratchpad = getOutputTensors()[2];
+  auto output_temp = getOutputTensors()[3];
+
+  int32_t input_zp = input()->zero_point();
+  int32_t output_zp = output()->zero_point();
+  luci_interpreter_pal::IntegerSVDF(
+    params_svdf, getTensorShape(input()), getTensorData<int8_t>(input()),
+    getTensorShape(weight_feature()), getTensorData<int8_t>(weight_feature()),
+    getTensorShape(weight_time()), getTensorData<int16_t>(weight_time()), getTensorShape(bias()),
+    getTensorData<int32_t>(bias()), scratchpad_data, getTensorShape(output()),
+    getTensorData<int8_t>(output()), getTensorData<int32_t>(scratchpad),
+    getTensorData<int32_t>(output_temp), effective_scale_1_a, effective_scale_1_b,
+    effective_scale_2_a, effective_scale_2_b, input_zp, output_zp);
+}
+
+void SVDF::evalFloat() const
+{
+  TfLiteSVDFParams params_svdf{};
+  params_svdf.asymmetric_quantize_inputs = params().asymmetric_quantize_inputs;
+  params_svdf.rank = params().svdf_rank;
+  params_svdf.activation = get_tflite_activation(params().activation);
+
+  auto scratchpad_activation_state = getOutputTensors()[1];
+  // Note: it is expected that activation_state input variable tensor reset to zero,
+  // also expected that this variable tensor doesn't have buffer
+  auto scratchpad_data = getTensorData<float>(scratchpad_activation_state);
+  std::fill_n(scratchpad_data, scratchpad_activation_state->shape().num_elements(), 0);
+
+  auto scratchpad_1 = getOutputTensors()[2];
+
+  luci_interpreter_pal::FloatSVDF(
+    params_svdf, getTensorShape(input()), getTensorData<float>(input()),
+    getTensorShape(weight_feature()), getTensorData<float>(weight_feature()),
+    getTensorShape(weight_time()), getTensorData<float>(weight_time()), getTensorShape(bias()),
+    getTensorData<float>(bias()), getTensorData<float>(scratchpad_1), scratchpad_data,
+    getTensorShape(output()), getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.h b/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.h
new file mode 100644
index 000000000..335a6cd8f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SVDF_H
+#define LUCI_INTERPRETER_KERNELS_SVDF_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SVDF : public KernelWithParams<SVDFParams>
+{
+public:
+  SVDF(const Tensor *input, const Tensor *weight_feature, const Tensor *weight_time,
+       const Tensor *bias, const Tensor *input_activation_state, Tensor *output,
+       Tensor *scratchpad_activation_state, Tensor *scratchpad_1, Tensor *scratchpad_2,
+       Tensor *scratchpad_3, Tensor *scratchpad_4, Tensor *scratchpad_5, Tensor *scratchpad_6,
+       const SVDFParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *weight_feature() const { return _inputs[1]; }
+  const Tensor *weight_time() const { return _inputs[2]; }
+  const Tensor *bias() const { return _inputs[3]; }
+  const Tensor *input_activation_state() const { return _inputs[4]; }
+
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalInteger() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SVDF_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.test.cpp
new file mode 100644
index 000000000..82bd9b009
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SVDF.test.cpp
@@ -0,0 +1,341 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SVDF.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class SVDFTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(SVDFTest, FullIntegerTest)
+{
+  const int32_t batches = 2;
+  const int32_t input_size = 3;
+  const int32_t units = 4;
+  const int32_t memory_size = 10;
+  const int32_t rank = 1;
+  const int32_t num_filters = units * rank;
+
+  Shape input_shape{batches, input_size};
+  Shape weight_feature_shape{num_filters, input_size};
+  Shape weight_time_shape{num_filters, memory_size};
+  Shape bias_shape{units};
+  Shape activation_state_shape{batches, memory_size * num_filters};
+
+  std::vector<float> input_data{0.49837467, 0.19278903, 0.26584083,
+                                0.17660543, 0.52949083, -0.77931279};
+
+  std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667,   0.37613347,
+                                         0.22197971,  0.12416199,  0.27901134,  0.27557442,
+                                         0.3905206,   -0.36137494, -0.06634006, -0.10640851};
+
+  std::vector<float> weight_time_data{
+    -0.31930989, 0.37613347,  0.27901134,  -0.36137494, -0.36118156,
+    0.22197971,  0.27557442,  -0.06634006, 0.0079667,   0.12416199,
+
+    0.3905206,   -0.10640851, -0.0976817,  0.15294972,  0.39635518,
+    -0.02702999, 0.39296314,  0.15785322,  0.21931258,  0.31053296,
+
+    -0.36916667, 0.38031587,  -0.21580373, 0.27072677,  0.23622236,
+    0.34936687,  0.18174365,  0.35907319,  -0.17493086, 0.324846,
+
+    -0.10781813, 0.27201805,  0.14324132,  -0.23681851, -0.27115166,
+    -0.01580888, -0.14943552, 0.15465137,  0.09784451,  -0.0337657};
+
+  std::vector<float> bias_data{-0.0976817, 0.15294972, 0.39635518, -0.02702999};
+
+  std::pair<float, int32_t> input_quant_param = quantizationParams<int8_t>(-1, 1);
+  std::pair<float, int32_t> weight_feature_quant_param = quantizationParams<int8_t>(-0.5, 0.5);
+  std::pair<float, int32_t> weight_time_quant_param = quantizationParams<int16_t>(-1, 1);
+  std::pair<float, int32_t> bias_quant_param = quantizationParams<int32_t>(-512, 512);
+  std::pair<float, int32_t> activation_state_quant_param = quantizationParams<int16_t>(-16, 16);
+
+  std::pair<float, int32_t> output_quant_param = quantizationParams<int8_t>(-0.5, 0.5);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor weight_feature_tensor = makeInputTensor<DataType::S8>(
+    weight_feature_shape, weight_feature_quant_param.first, weight_feature_quant_param.second,
+    weight_feature_data, _memory_manager.get());
+  Tensor weight_time_tensor = makeInputTensor<DataType::S16>(
+    weight_time_shape, weight_time_quant_param.first, weight_time_quant_param.second,
+    weight_time_data, _memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(
+    bias_shape, bias_quant_param.first, bias_quant_param.second, bias_data, _memory_manager.get());
+  Tensor activation_state_tensor = makeOutputTensor(
+    DataType::S16, activation_state_quant_param.first, activation_state_quant_param.second);
+  activation_state_tensor.resize(activation_state_shape);
+  Tensor output_tensor =
+    makeOutputTensor(DataType::S8, output_quant_param.first, output_quant_param.second);
+
+  Tensor scratchpad_activation_state(DataType::S16, Shape({}), {}, "");
+  Tensor scratchpad_1(DataType::S32, Shape({}), {}, "");
+  Tensor scratchpad_2(DataType::S32, Shape({}), {}, "");
+  Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, "");
+
+  SVDFParams params{};
+  params.activation = Activation::RELU;
+  params.asymmetric_quantize_inputs = false;
+  params.svdf_rank = rank;
+
+  SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, &bias_tensor,
+              &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1,
+              &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad_activation_state);
+  _memory_manager->allocate_memory(scratchpad_1);
+  _memory_manager->allocate_memory(scratchpad_2);
+  _memory_manager->allocate_memory(scratchpad_3);
+  _memory_manager->allocate_memory(scratchpad_4);
+  _memory_manager->allocate_memory(scratchpad_5);
+  _memory_manager->allocate_memory(scratchpad_6);
+  kernel.execute();
+
+  std::vector<int8_t> ref_output_data{-9, 24, 31, 1, -10, 10, -3, 0};
+
+  std::vector<int32_t> ref_output_shape{batches, units};
+  EXPECT_THAT(extractTensorData<int8_t>(output_tensor), ref_output_data);
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(SVDFTest, FloatTest)
+{
+  const int32_t batches = 2;
+  const int32_t input_size = 3;
+  const int32_t units = 4;
+  const int32_t memory_size = 10;
+  const int32_t rank = 1;
+  const int32_t num_filters = units * rank;
+
+  Shape input_shape{batches, input_size};
+  Shape weight_feature_shape{num_filters, input_size};
+  Shape weight_time_shape{num_filters, memory_size};
+  Shape activation_state_shape{batches, memory_size * num_filters};
+
+  std::vector<float> input_data{0.12609188, -0.46347019, -0.89598465,
+                                0.35867718, 0.36897406,  0.73463392};
+
+  std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667,   0.37613347,
+                                         0.22197971,  0.12416199,  0.27901134,  0.27557442,
+                                         0.3905206,   -0.36137494, -0.06634006, -0.10640851};
+
+  std::vector<float> weight_time_data{
+    -0.31930989, 0.37613347,  0.27901134,  -0.36137494, -0.36118156,
+    0.22197971,  0.27557442,  -0.06634006, 0.0079667,   0.12416199,
+
+    0.3905206,   -0.10640851, -0.0976817,  0.15294972,  0.39635518,
+    -0.02702999, 0.39296314,  0.15785322,  0.21931258,  0.31053296,
+
+    -0.36916667, 0.38031587,  -0.21580373, 0.27072677,  0.23622236,
+    0.34936687,  0.18174365,  0.35907319,  -0.17493086, 0.324846,
+
+    -0.10781813, 0.27201805,  0.14324132,  -0.23681851, -0.27115166,
+    -0.01580888, -0.14943552, 0.15465137,  0.09784451,  -0.0337657};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>(
+    weight_feature_shape, weight_feature_data, _memory_manager.get());
+  Tensor weight_time_tensor =
+    makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get());
+  Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  activation_state_tensor.resize(activation_state_shape);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, "");
+
+  SVDFParams params{};
+  params.activation = Activation::NONE;
+  params.asymmetric_quantize_inputs = false;
+  params.svdf_rank = rank;
+
+  SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr,
+              &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1,
+              &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  _memory_manager->allocate_memory(scratchpad_activation_state);
+  _memory_manager->allocate_memory(scratchpad_1);
+  _memory_manager->allocate_memory(scratchpad_2);
+  _memory_manager->allocate_memory(scratchpad_3);
+  _memory_manager->allocate_memory(scratchpad_4);
+  _memory_manager->allocate_memory(scratchpad_5);
+  _memory_manager->allocate_memory(scratchpad_6);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.014899,    -0.0517661, -0.143725, -0.00271883,
+                                     -0.03004015, 0.09565311, 0.1587342, 0.00784263};
+
+  std::vector<float> ref_output_shape{batches, units};
+  const float tolerance = 1e-5;
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data, tolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(SVDFTest, Unsupported_Type_Configure_NEG)
+{
+  const int32_t batches = 2;
+  const int32_t input_size = 3;
+  const int32_t units = 4;
+  const int32_t memory_size = 10;
+  const int32_t rank = 1;
+  const int32_t num_filters = units * rank;
+
+  Shape input_shape{batches, input_size};
+  Shape weight_feature_shape{num_filters, input_size};
+  Shape weight_time_shape{num_filters, memory_size};
+  Shape activation_state_shape{batches, memory_size * num_filters};
+
+  std::vector<int32_t> input_data{0, 1, 3, 4, 4, -2};
+
+  std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667,   0.37613347,
+                                         0.22197971,  0.12416199,  0.27901134,  0.27557442,
+                                         0.3905206,   -0.36137494, -0.06634006, -0.10640851};
+
+  std::vector<float> weight_time_data{
+    -0.31930989, 0.37613347,  0.27901134,  -0.36137494, -0.36118156,
+    0.22197971,  0.27557442,  -0.06634006, 0.0079667,   0.12416199,
+
+    0.3905206,   -0.10640851, -0.0976817,  0.15294972,  0.39635518,
+    -0.02702999, 0.39296314,  0.15785322,  0.21931258,  0.31053296,
+
+    -0.36916667, 0.38031587,  -0.21580373, 0.27072677,  0.23622236,
+    0.34936687,  0.18174365,  0.35907319,  -0.17493086, 0.324846,
+
+    -0.10781813, 0.27201805,  0.14324132,  -0.23681851, -0.27115166,
+    -0.01580888, -0.14943552, 0.15465137,  0.09784451,  -0.0337657};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S32>(input_shape, input_data, _memory_manager.get());
+  Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>(
+    weight_feature_shape, weight_feature_data, _memory_manager.get());
+  Tensor weight_time_tensor =
+    makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get());
+  Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  activation_state_tensor.resize(activation_state_shape);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, "");
+
+  SVDFParams params{};
+  params.activation = Activation::NONE;
+  params.asymmetric_quantize_inputs = false;
+  params.svdf_rank = rank;
+
+  SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr,
+              &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1,
+              &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(SVDFTest, Invalid_Input_Shape_NEG)
+{
+  const int32_t batches = 2;
+  const int32_t right_input_size = 3;
+  const int32_t wrong_input_size = 4;
+  const int32_t units = 4;
+  const int32_t memory_size = 10;
+  const int32_t rank = 1;
+  const int32_t num_filters = units * rank;
+
+  Shape input_shape{batches, wrong_input_size};
+  Shape weight_feature_shape{num_filters, right_input_size};
+  Shape weight_time_shape{num_filters, memory_size};
+  Shape activation_state_shape{batches, memory_size * num_filters};
+
+  std::vector<float> input_data{0, 1, 3, 2, 4, 4, -2, 1};
+
+  std::vector<float> weight_feature_data{-0.31930989, -0.36118156, 0.0079667,   0.37613347,
+                                         0.22197971,  0.12416199,  0.27901134,  0.27557442,
+                                         0.3905206,   -0.36137494, -0.06634006, -0.10640851};
+
+  std::vector<float> weight_time_data{
+    -0.31930989, 0.37613347,  0.27901134,  -0.36137494, -0.36118156,
+    0.22197971,  0.27557442,  -0.06634006, 0.0079667,   0.12416199,
+
+    0.3905206,   -0.10640851, -0.0976817,  0.15294972,  0.39635518,
+    -0.02702999, 0.39296314,  0.15785322,  0.21931258,  0.31053296,
+
+    -0.36916667, 0.38031587,  -0.21580373, 0.27072677,  0.23622236,
+    0.34936687,  0.18174365,  0.35907319,  -0.17493086, 0.324846,
+
+    -0.10781813, 0.27201805,  0.14324132,  -0.23681851, -0.27115166,
+    -0.01580888, -0.14943552, 0.15465137,  0.09784451,  -0.0337657};
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor weight_feature_tensor = makeInputTensor<DataType::FLOAT32>(
+    weight_feature_shape, weight_feature_data, _memory_manager.get());
+  Tensor weight_time_tensor =
+    makeInputTensor<DataType::FLOAT32>(weight_time_shape, weight_time_data, _memory_manager.get());
+  Tensor activation_state_tensor = makeOutputTensor(DataType::FLOAT32);
+  activation_state_tensor.resize(activation_state_shape);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Tensor scratchpad_activation_state(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_1(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_2(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_3(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_4(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_5(DataType::FLOAT32, Shape({}), {}, "");
+  Tensor scratchpad_6(DataType::FLOAT32, Shape({}), {}, "");
+
+  SVDFParams params{};
+  params.activation = Activation::NONE;
+  params.asymmetric_quantize_inputs = false;
+  params.svdf_rank = rank;
+
+  SVDF kernel(&input_tensor, &weight_feature_tensor, &weight_time_tensor, nullptr,
+              &activation_state_tensor, &output_tensor, &scratchpad_activation_state, &scratchpad_1,
+              &scratchpad_2, &scratchpad_3, &scratchpad_4, &scratchpad_5, &scratchpad_6, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Shape.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Shape.cpp
new file mode 100644
index 000000000..0429fe1e5
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Shape.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Shape.h"
+#include "kernels/Utils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ShapeKernel::ShapeKernel(const Tensor *input, Tensor *output, const ShapeParams &params)
+  : KernelWithParams<ShapeParams>({input}, {output}, params)
+{
+}
+
+void ShapeKernel::configure()
+{
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::S32 or
+                         output()->element_type() == DataType::S64);
+  const auto input_shape = input()->shape();
+
+  Shape output_shape(1);
+  output_shape.dim(0) = input_shape.num_dims();
+
+  output()->resize(output_shape);
+}
+
+void ShapeKernel::execute() const
+{
+  switch (params().out_type)
+  {
+    case DataType::S32:
+      evalInt<int32_t>();
+      break;
+    case DataType::S64:
+      evalInt<int64_t>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T> void ShapeKernel::evalInt() const
+{
+  const auto input_shape = input()->shape();
+
+  auto output_data = getTensorData<T>(output());
+
+  for (int i = 0; i < input_shape.num_dims(); ++i)
+  {
+    output_data[i] = input_shape.dim(i);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Shape.h b/compiler/luci-micro/luci-interpreter/src/kernels/Shape.h
new file mode 100644
index 000000000..cfaadec91
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Shape.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SHAPE_H
+#define LUCI_INTERPRETER_KERNELS_SHAPE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ShapeKernel : public KernelWithParams<ShapeParams>
+{
+public:
+  ShapeKernel(const Tensor *input, Tensor *output, const ShapeParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> void evalInt() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SHAPE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Shape.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Shape.test.cpp
new file mode 100644
index 000000000..4763e016c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Shape.test.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Shape.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class ShapeTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+template <typename T> void runShapeKernel(loco::DataType dataType, IMemoryManager *memory_manager)
+{
+  Shape input_shape{1, 3, 1, 3, 5};
+
+  Tensor input_tensor = Tensor(loco::DataType::FLOAT32, input_shape, {}, "");
+  Tensor output_tensor = makeOutputTensor(dataType);
+
+  ShapeParams params{};
+  params.out_type = dataType;
+
+  ShapeKernel kernel(&input_tensor, &output_tensor, params);
+
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<T> ref_output_data{1, 3, 1, 3, 5};
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ref_output_data);
+
+  std::vector<int32_t> ref_output_shape{5};
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(ShapeTest, OutTypeInt)
+{
+
+  // Run for int32_t output
+  runShapeKernel<int32_t>(loco::DataType::S32, _memory_manager.get());
+  // Run for int64_t output
+  runShapeKernel<int64_t>(loco::DataType::S64, _memory_manager.get());
+
+  SUCCEED();
+}
+
+TEST_F(ShapeTest, Invalid_Output_Type_NEG)
+{
+  Shape input_shape{1, 3};
+
+  Tensor input_tensor = Tensor(loco::DataType::FLOAT32, input_shape, {}, "");
+  Tensor output_tensor = makeOutputTensor(loco::DataType::FLOAT32);
+
+  ShapeParams params{};
+  params.out_type = loco::DataType::FLOAT32;
+
+  ShapeKernel kernel(&input_tensor, &output_tensor, params);
+
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Slice.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Slice.cpp
new file mode 100644
index 000000000..2fe2c5471
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Slice.cpp
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Slice.h"
+#include "Utils.h"
+#include "PALSlice.h"
+
+#include <cassert>
+#include <cstring>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+const int max_dim = 4;
+
+Slice::Slice(const Tensor *input, const Tensor *begin, const Tensor *size, Tensor *output)
+  : Kernel({input, begin, size}, {output})
+{
+}
+
+template <typename T>
+Shape calculateOutputShape(const Tensor *input, const Tensor *begin, const Tensor *size)
+{
+  Shape output_shape = Shape(input->shape().num_dims());
+  for (int idx = 0; idx < input->shape().num_dims(); idx++)
+  {
+    T size_value = getTensorData<T>(size)[idx];
+    if (size_value < 0)
+    {
+      if (size_value != -1)
+      {
+        throw std::runtime_error("Invalid size.");
+      }
+      size_value = input->shape().dim(idx) - getTensorData<T>(begin)[idx];
+    }
+    else
+    {
+      if (input->shape().dim(idx) < getTensorData<T>(begin)[idx] + size_value)
+      {
+        throw std::runtime_error("Invalid begin and size.");
+      }
+    }
+    output_shape.dim(idx) = static_cast<int>(size_value);
+  }
+  return output_shape;
+}
+
+template <typename T>
+void getBeginAndSizeVectors(int dimensions, const Tensor *begin, const Tensor *size,
+                            std::vector<int> *begins, std::vector<int> *sizes)
+{
+  for (int idx = dimensions - 1; idx >= 0; --idx)
+  {
+    begins->push_back(getTensorData<T>(begin)[idx]);
+    sizes->push_back(getTensorData<T>(size)[idx]);
+  }
+}
+
+void Slice::configure()
+{
+  assert(input()->element_type() == output()->element_type());
+  assert(begin()->element_type() == DataType::S32 || begin()->element_type() == DataType::S64);
+  assert(size()->element_type() == DataType::S32 || size()->element_type() == DataType::S64);
+  assert(begin()->shape().num_dims() == 1);
+  assert(size()->shape().num_dims() == 1);
+  assert(input()->shape().num_dims() <= max_dim);
+
+  if (begin()->element_type() == DataType::S32)
+  {
+    output()->resize(calculateOutputShape<int32_t>(input(), begin(), size()));
+  }
+  else if (begin()->element_type() == DataType::S64)
+  {
+    output()->resize(calculateOutputShape<int64_t>(input(), begin(), size()));
+  }
+  else
+  {
+    throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Slice::execute() const
+{
+  std::vector<int> begins;
+  begins.reserve(max_dim);
+  std::vector<int> sizes;
+  sizes.reserve(max_dim);
+  if (begin()->element_type() == DataType::S32)
+  {
+    getBeginAndSizeVectors<int32_t>(input()->shape().num_dims(), begin(), size(), &begins, &sizes);
+  }
+  else if (begin()->element_type() == DataType::S64)
+  {
+    getBeginAndSizeVectors<int64_t>(input()->shape().num_dims(), begin(), size(), &begins, &sizes);
+  }
+  else
+  {
+    throw std::runtime_error("Unsupported begin type.");
+  }
+  for (int i = input()->shape().num_dims(); i < max_dim; ++i)
+  {
+    begins.push_back(0);
+    sizes.push_back(1);
+  }
+
+  assert(begins.size() == 4);
+  assert(sizes.size() == 4);
+  tflite::SliceParams op_params{};
+  op_params.begin_count = 4;
+  op_params.size_count = 4;
+  for (int i = 0; i < 4; i++)
+  {
+    op_params.begin[i] = begins[3 - i];
+    op_params.size[i] = sizes[3 - i];
+  }
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      luci_interpreter_pal::Slice(op_params, getTensorShape(input()), getTensorData<float>(input()),
+                                  getTensorShape(output()), getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      luci_interpreter_pal::Slice(op_params, getTensorShape(input()),
+                                  getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                  getTensorData<uint8_t>(output()));
+      break;
+    case DataType::S8:
+      luci_interpreter_pal::Slice(op_params, getTensorShape(input()),
+                                  getTensorData<int8_t>(input()), getTensorShape(output()),
+                                  getTensorData<int8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported input type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Slice.h b/compiler/luci-micro/luci-interpreter/src/kernels/Slice.h
new file mode 100644
index 000000000..23c359608
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Slice.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SLICE_H
+#define LUCI_INTERPRETER_KERNELS_SLICE_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Slice : public Kernel
+{
+public:
+  Slice(const Tensor *input, const Tensor *begin, const Tensor *size, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *begin() const { return _inputs[1]; }
+  const Tensor *size() const { return _inputs[2]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SLICE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Slice.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Slice.test.cpp
new file mode 100644
index 000000000..517982990
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Slice.test.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Slice.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T> class SliceTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
+TYPED_TEST_SUITE(SliceTest, DataTypes);
+
+TYPED_TEST(SliceTest, SimpleTest)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  std::vector<TypeParam> input_data{1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6};
+  Shape input_shape{3, 2, 3, 1};
+  std::vector<int32_t> begin_data{1, 0, 0, 0};
+  Shape begin_shape{4};
+  std::vector<int32_t> size_data{2, 1, -1, 1};
+  Shape size_shape{4};
+  std::vector<TypeParam> output_data{3, 3, 3, 5, 5, 5};
+  std::vector<int32_t> output_shape{2, 1, 3, 1};
+
+  Tensor input_tensor =
+    makeInputTensor<getElementType<TypeParam>()>(input_shape, input_data, memory_manager.get());
+  Tensor begin_tensor =
+    makeInputTensor<DataType::S32>(begin_shape, begin_data, memory_manager.get());
+  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data, memory_manager.get());
+
+  Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>());
+
+  Slice kernel(&input_tensor, &begin_tensor, &size_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
+              ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.cpp
new file mode 100644
index 000000000..c230aaa70
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Softmax.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/softmax.h>
+#include "PALSoftmax.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Softmax::Softmax(const Tensor *input, Tensor *output, const SoftmaxParams &params)
+  : KernelWithParams<SoftmaxParams>({input}, {output}, params)
+{
+}
+
+void Softmax::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= 1);
+  if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::S8 || output()->zero_point() == 0);
+    LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::U8 ||
+                           output()->zero_point() == std::numeric_limits<int8_t>::min());
+    tflite::SoftmaxParams op_params{};
+    op_params.table = _table;
+    luci_interpreter_pal::PopulateSoftmaxLookupTable(&op_params, input()->scale(), params().beta);
+  }
+  output()->resize(input()->shape());
+}
+
+void Softmax::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S8:
+      evalQuantized<int8_t>();
+      break;
+    case DataType::U8:
+      evalQuantized<uint8_t>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Softmax::evalFloat() const
+{
+  tflite::SoftmaxParams op_params{};
+  op_params.beta = params().beta;
+
+  tflite::reference_ops::Softmax(op_params, getTensorShape(input()), getTensorData<float>(input()),
+                                 getTensorShape(output()), getTensorData<float>(output()));
+}
+
+template <typename T> void Softmax::evalQuantized() const
+{
+  tflite::SoftmaxParams op_params{};
+  op_params.table = const_cast<float *>(_table);
+  op_params.zero_point = output()->zero_point();
+  op_params.scale = output()->scale();
+  luci_interpreter_pal::InitializeParams(&op_params, input()->scale(), params().beta);
+  luci_interpreter_pal::Softmax(op_params, getTensorShape(input()), getTensorData<T>(input()),
+                                getTensorShape(output()), getTensorData<T>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.h b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.h
new file mode 100644
index 000000000..1f281df1c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SOFTMAX_H
+#define LUCI_INTERPRETER_KERNELS_SOFTMAX_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Softmax : public KernelWithParams<SoftmaxParams>
+{
+public:
+  Softmax(const Tensor *input, Tensor *output, const SoftmaxParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  template <typename T> void evalQuantized() const;
+
+  float _table[256];
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SOFTMAX_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.test.cpp
new file mode 100644
index 000000000..08e70672d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Softmax.test.cpp
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Softmax.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T> constexpr loco::DataType toLocoDataType();
+
+template <> constexpr loco::DataType toLocoDataType<float>() { return loco::DataType::FLOAT32; }
+
+template <> constexpr loco::DataType toLocoDataType<uint8_t>() { return loco::DataType::U8; }
+
+template <> constexpr loco::DataType toLocoDataType<int8_t>() { return loco::DataType::S8; }
+
+template <typename T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<toLocoDataType<T>()>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(toLocoDataType<T>());
+
+  SoftmaxParams params{};
+  params.beta = 0.1;
+
+  Softmax kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <typename T, std::enable_if_t<std::is_integral<T>::value, bool> = true>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  std::pair<float, int32_t> input_quant_param =
+    quantizationParams<T>(std::min<float>(std::min<float>(input_data), 0.f),
+                          std::max<float>(std::max<float>(input_data), 0.f));
+  std::pair<float, int32_t> output_quant_param =
+    quantizationParams<T>(std::min<float>(std::min<float>(output_data), 0.f),
+                          std::max<float>(std::max<float>(output_data), 0.f));
+  Tensor input_tensor = makeInputTensor<toLocoDataType<T>()>(input_shape, input_quant_param.first,
+                                                             input_quant_param.second, input_data,
+                                                             memory_manager.get());
+  Tensor output_tensor =
+    makeOutputTensor(toLocoDataType<T>(), output_quant_param.first, output_quant_param.second);
+
+  SoftmaxParams params{};
+  params.beta = 0.1;
+
+  Softmax kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, output_tensor.scale()));
+}
+
+template <typename T> class SoftmaxTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t, int8_t>;
+TYPED_TEST_SUITE(SoftmaxTest, DataTypes);
+
+TYPED_TEST(SoftmaxTest, Simple)
+{
+  Check<TypeParam>({2, 1, 2, 3}, {2, 1, 2, 3},
+                   {
+                     5, -9, 8,  //
+                     -7, 2, -4, //
+                     1, -2, 9,  //
+                     3, -6, -1, //
+                   },
+                   {
+                     0.38514, 0.09497, 0.51989, //
+                     0.20792, 0.51141, 0.28067, //
+                     0.25212, 0.18678, 0.56110, //
+                     0.48149, 0.19576, 0.32275, //
+                   });
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.cpp
new file mode 100644
index 000000000..630cd38c4
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.cpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SpaceToBatchND.h"
+#include "kernels/Utils.h"
+
+#include "PALSpaceToBatchND.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+const int kInputMinDimensionNum = 3;
+const int kInputMaxDimensionNum = 4;
+
+} // namespace
+
+SpaceToBatchND::SpaceToBatchND(const Tensor *input, const Tensor *block_shape,
+                               const Tensor *paddings, Tensor *output)
+  : Kernel({input, block_shape, paddings}, {output})
+{
+}
+
+void SpaceToBatchND::configure()
+{
+  const auto *block_shape_data = block_shape()->data<int32_t>();
+  const auto *paddings_data = paddings()->data<int32_t>();
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= kInputMinDimensionNum);
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= kInputMaxDimensionNum);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+  int spatial_dims_num = input()->shape().num_dims() - 2;
+
+  LUCI_INTERPRETER_CHECK(block_shape()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(block_shape()->shape().dim(0) == spatial_dims_num);
+
+  LUCI_INTERPRETER_CHECK(paddings()->shape().num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(paddings()->shape().dim(0) == spatial_dims_num);
+  LUCI_INTERPRETER_CHECK(paddings()->shape().dim(1) == 2);
+
+  Shape output_shape = Shape(input()->shape().num_dims());
+  int output_batch_size = input()->shape().dim(0);
+  for (int i = 0; i < spatial_dims_num; ++i)
+  {
+    int final_dim_size =
+      (input()->shape().dim(i + 1) + paddings_data[i * 2] + paddings_data[i * 2 + 1]);
+    LUCI_INTERPRETER_CHECK(final_dim_size % block_shape_data[i] == 0);
+    output_shape.dim(i + 1) = final_dim_size / block_shape_data[i];
+    output_batch_size = output_batch_size * block_shape_data[i];
+  }
+  output_shape.dim(0) = output_batch_size;
+  output_shape.dim(input()->shape().num_dims() - 1) =
+    input()->shape().dim(input()->shape().num_dims() - 1);
+  output()->resize(output_shape);
+}
+
+void SpaceToBatchND::execute() const
+{
+  switch (input()->element_type())
+  {
+    tflite::SpaceToBatchParams op_params;
+    case DataType::FLOAT32:
+      op_params.output_offset = 0;
+      luci_interpreter_pal::SpaceToBatchND(
+        op_params, getTensorShape(input()), getTensorData<float>(input()),
+        getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()),
+        getTensorShape(paddings()), getTensorData<int32_t>(paddings()), getTensorShape(output()),
+        getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      op_params.output_offset = output()->zero_point();
+      luci_interpreter_pal::SpaceToBatchND(
+        op_params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+        getTensorShape(block_shape()), getTensorData<int32_t>(block_shape()),
+        getTensorShape(paddings()), getTensorData<int32_t>(paddings()), getTensorShape(output()),
+        getTensorData<uint8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.h b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.h
new file mode 100644
index 000000000..0893003bb
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SPACETOBATCHND_H
+#define LUCI_INTERPRETER_KERNELS_SPACETOBATCHND_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SpaceToBatchND : public Kernel
+{
+public:
+  SpaceToBatchND(const Tensor *input, const Tensor *block_shape, const Tensor *paddings,
+                 Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *block_shape() const { return _inputs[1]; }
+  const Tensor *paddings() const { return _inputs[2]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SPACETOBATCHND_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp
new file mode 100644
index 000000000..3a8b0a812
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToBatchND.test.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SpaceToBatchND.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape,
+           std::initializer_list<int32_t> block_shape_shape,
+           std::initializer_list<int32_t> paddings_shape,
+           std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data,
+           std::initializer_list<int32_t> block_shape_data,
+           std::initializer_list<int32_t> paddings_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor block_shape_tensor =
+    makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>(paddings_shape, paddings_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <>
+void Check<uint8_t>(
+  std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> block_shape_shape,
+  std::initializer_list<int32_t> paddings_shape, std::initializer_list<int32_t> output_shape,
+  std::initializer_list<float> input_data, std::initializer_list<int32_t> block_shape_data,
+  std::initializer_list<int32_t> paddings_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  std::pair<float, int32_t> input_quant_param =
+    quantizationParams<uint8_t>(std::min(input_data), std::max(input_data));
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, input_quant_param.first, input_quant_param.second,
+                                  input_data, memory_manager.get());
+  Tensor block_shape_tensor =
+    makeInputTensor<DataType::S32>(block_shape_shape, block_shape_data, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>(paddings_shape, paddings_data, memory_manager.get());
+  Tensor output_tensor =
+    makeOutputTensor(DataType::U8, input_quant_param.first, input_quant_param.second);
+
+  SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, output_tensor.scale()));
+  EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <typename T> class SpaceToBatchNDTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(SpaceToBatchNDTest, DataTypes);
+
+TYPED_TEST(SpaceToBatchNDTest, Simple)
+{
+  Check<TypeParam>(/*input_shape=*/{1, 5, 2, 1}, /*block_shape_shape=*/{2},
+                   /*paddings_shape=*/{2, 2},
+                   /*output_shape=*/{6, 2, 2, 1},
+                   /*input_data=*/{-1.0, 0.2, -0.3, 0.4, -0.5, 0.6, -0.7, 0.8, -0.9, 1.0},
+                   /*block_shape_data=*/{3, 2}, /*paddings_data=*/{1, 0, 2, 0},
+                   /*output_data=*/{0, 0,   0, -0.5, 0, 0,    0, 0.6,  0, -1.0, 0, -0.7,
+                                    0, 0.2, 0, 0.8,  0, -0.3, 0, -0.9, 0, 0.4,  0, 1.0});
+}
+
+TEST(SpaceToBatchNDTest, Invalid_Shape_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(
+    {1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, memory_manager.get());
+  Tensor block_shape_tensor = makeInputTensor<DataType::S32>({2}, {2, 2}, memory_manager.get());
+  Tensor paddings_tensor =
+    makeInputTensor<DataType::S32>({2, 2}, {0, 0, 0, 0}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  SpaceToBatchND kernel(&input_tensor, &block_shape_tensor, &paddings_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.cpp
new file mode 100644
index 000000000..7c29e8cb0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SpaceToDepth.h"
+#include "Utils.h"
+#include "PALSpaceToDepth.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+SpaceToDepth::SpaceToDepth(const Tensor *input, Tensor *output, const SpaceToDepthParams &params)
+  : KernelWithParams<SpaceToDepthParams>({input}, {output}, params)
+{
+}
+
+void SpaceToDepth::configure()
+{
+  assert(input()->shape().num_dims() == 4);
+  assert(output()->element_type() == DataType::FLOAT32 ||
+         output()->element_type() == DataType::U8 || output()->element_type() == DataType::S8 ||
+         output()->element_type() == DataType::S32 || output()->element_type() == DataType::S64);
+  assert(input()->element_type() == output()->element_type());
+
+  const int block_size = params().block_size;
+  const int32_t input_height = input()->shape().dim(1);
+  const int32_t input_width = input()->shape().dim(2);
+  int32_t output_height = input_height / block_size;
+  int32_t output_width = input_width / block_size;
+
+  assert(input_height == output_height * block_size);
+  assert(input_width == output_width * block_size);
+
+  Shape output_shape(4);
+  output_shape.dim(0) = input()->shape().dim(0);
+  output_shape.dim(1) = output_height;
+  output_shape.dim(2) = output_width;
+  output_shape.dim(3) = input()->shape().dim(3) * block_size * block_size;
+
+  output()->resize(output_shape);
+}
+
+void SpaceToDepth::execute() const
+{
+  tflite::SpaceToDepthParams op_params{};
+  op_params.block_size = params().block_size;
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      luci_interpreter_pal::SpaceToDepth(op_params, getTensorShape(input()),
+                                         getTensorData<float>(input()), getTensorShape(output()),
+                                         getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      luci_interpreter_pal::SpaceToDepth(op_params, getTensorShape(input()),
+                                         getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                         getTensorData<uint8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.h b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.h
new file mode 100644
index 000000000..e66316b11
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SPACETODEPTH_H
+#define LUCI_INTERPRETER_KERNELS_SPACETODEPTH_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <vector>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SpaceToDepth : public KernelWithParams<SpaceToDepthParams>
+{
+public:
+  SpaceToDepth(const Tensor *input, Tensor *output, const SpaceToDepthParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SPACETODEPTH_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.test.cpp
new file mode 100644
index 000000000..4af488618
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SpaceToDepth.test.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SpaceToDepth.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T> class SpaceToDepthTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(SpaceToDepthTest, DataTypes);
+
+TYPED_TEST(SpaceToDepthTest, SimpleCase)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  constexpr DataType element_type = getElementType<TypeParam>();
+  std::vector<TypeParam> input_data{1, 5, 6, 7, 2, 3, 4, 8};
+  Shape input_shape{1, 2, 2, 2};
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  std::vector<TypeParam> output_data{1, 5, 6, 7, 2, 3, 4, 8};
+  std::vector<int32_t> output_shape{1, 1, 1, 8};
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  SpaceToDepthParams params{};
+  params.block_size = 2;
+
+  SpaceToDepth kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
+              ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Split.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Split.cpp
new file mode 100644
index 000000000..1a563f307
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Split.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Split.h"
+
+#include "Utils.h"
+
+#include "PALSplit.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Split::Split(const Tensor *axis, const Tensor *input, std::vector<Tensor *> outputs)
+  : Kernel({axis, input}, std::move(outputs))
+{
+}
+
+void Split::configure()
+{
+  assert(axis()->shape().num_elements() == 1);
+  _axis_value = getTensorData<int32_t>(axis())[0];
+  if (_axis_value < 0)
+    _axis_value += input()->shape().num_dims();
+  assert(_axis_value >= 0 && _axis_value < input()->shape().num_dims());
+
+  const int32_t input_size = input()->shape().dim(_axis_value);
+  assert(input_size % _outputs.size() == 0);
+  const int32_t slice_size = input_size / _outputs.size();
+
+  Shape output_shape = input()->shape();
+  output_shape.dim(_axis_value) = slice_size;
+  for (Tensor *output : _outputs)
+  {
+    output->resize(output_shape);
+  }
+}
+
+void Split::execute() const
+{
+  tflite::SplitParams params{};
+  params.num_split = _outputs.size();
+  params.axis = _axis_value;
+
+#define TF_LITE_SPLIT(scalar)                                                                    \
+  {                                                                                              \
+    VectorOfTensors<scalar, false> all_outputs(_outputs);                                        \
+    luci_interpreter_pal::Split(params, getTensorShape(input()), getTensorData<scalar>(input()), \
+                                all_outputs.shapes(), all_outputs.data());                       \
+  }
+
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      TF_LITE_SPLIT(float);
+      break;
+    case DataType::U8:
+      TF_LITE_SPLIT(uint8_t);
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+#undef TF_LITE_SPLIT
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Split.h b/compiler/luci-micro/luci-interpreter/src/kernels/Split.h
new file mode 100644
index 000000000..9542b1e56
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Split.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SPLIT_H
+#define LUCI_INTERPRETER_KERNELS_SPLIT_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Split : public Kernel
+{
+public:
+  Split(const Tensor *axis, const Tensor *input, std::vector<Tensor *> outputs);
+
+  const Tensor *axis() const { return _inputs[0]; }
+  const Tensor *input() const { return _inputs[1]; }
+  Tensor *output(int index) const { return _outputs[index]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  int32_t _axis_value{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SPLIT_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Split.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Split.test.cpp
new file mode 100644
index 000000000..283cd9aa9
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Split.test.cpp
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Split.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(int axis, int num_splits, std::initializer_list<int32_t> input_shape,
+           std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data,
+           std::vector<std::vector<T>> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  constexpr DataType element_type = getElementType<T>();
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({}, {axis}, memory_manager.get());
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+
+  std::vector<Tensor> output_tensors;
+  output_tensors.reserve(num_splits);
+  for (int i = 0; i < num_splits; ++i)
+  {
+    output_tensors.emplace_back(makeOutputTensor(element_type));
+  }
+
+  std::vector<Tensor *> output_tensor_ptrs(num_splits);
+  for (int i = 0; i < num_splits; ++i)
+  {
+    output_tensor_ptrs[i] = &output_tensors[i];
+  }
+
+  Split kernel(&axis_tensor, &input_tensor, std::move(output_tensor_ptrs));
+  kernel.configure();
+  for (int i = 0; i < num_splits; ++i)
+  {
+    memory_manager->allocate_memory(output_tensors[i]);
+  }
+  kernel.execute();
+
+  for (int i = 0; i < num_splits; ++i)
+  {
+    EXPECT_THAT(extractTensorData<T>(output_tensors[i]),
+                ::testing::ElementsAreArray(output_data[i]));
+  }
+}
+
+template <typename T> class SplitTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(SplitTest, DataTypes);
+
+TYPED_TEST(SplitTest, FourDimensional)
+{
+  Check<TypeParam>(/*axis=*/0, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2},
+                   {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+                   {
+                     {1, 2, 3, 4, 5, 6, 7, 8},        //
+                     {9, 10, 11, 12, 13, 14, 15, 16}, //
+                   });
+  Check<TypeParam>(
+    /*axis=*/1, /*num_splits=*/2, {2, 2, 2, 2}, {2, 1, 2, 2},
+    {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+    {
+      {1, 2, 3, 4, 9, 10, 11, 12},  //
+      {5, 6, 7, 8, 13, 14, 15, 16}, //
+    });
+  Check<TypeParam>(
+    /*axis=*/2, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 1, 2},
+    {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+    {
+      {1, 2, 5, 6, 9, 10, 13, 14},  //
+      {3, 4, 7, 8, 11, 12, 15, 16}, //
+    });
+  Check<TypeParam>(
+    /*axis=*/3, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 2, 1},
+    {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+    {
+      {1, 3, 5, 7, 9, 11, 13, 15},  //
+      {2, 4, 6, 8, 10, 12, 14, 16}, //
+    });
+}
+
+TYPED_TEST(SplitTest, OneDimensional)
+{
+  Check<TypeParam>(
+    /*axis=*/0, /*num_splits=*/8, {8}, {1}, {1, 2, 3, 4, 5, 6, 7, 8},
+    {{1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}});
+}
+
+TYPED_TEST(SplitTest, NegativeAxis)
+{
+  Check<TypeParam>(
+    /*axis=*/-4, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2},
+    {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+    {
+      {1, 2, 3, 4, 5, 6, 7, 8}, //
+      {9, 10, 11, 12, 13, 14, 15, 16},
+    });
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.cpp
new file mode 100644
index 000000000..aa6820889
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.cpp
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SplitV.h"
+
+#include "Utils.h"
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+SplitV::SplitV(const Tensor *input, const Tensor *size_splits, const Tensor *axis,
+               std::vector<Tensor *> outputs)
+  : Kernel({input, size_splits, axis}, std::move(outputs))
+{
+}
+
+void SplitV::configure()
+{
+  assert(axis()->shape().num_elements() == 1);
+  _axis_value = getTensorData<int32_t>(axis())[0];
+  if (_axis_value < 0)
+    _axis_value += input()->shape().num_dims();
+  assert(_axis_value >= 0 && _axis_value < input()->shape().num_dims());
+
+  auto num_split = static_cast<int32_t>(_outputs.size());
+  auto sizes_data = getTensorData<int32_t>(size_splits());
+
+  assert(size_splits()->shape().num_dims() == 1);
+
+  int32_t sum = 0;
+  const auto num_dims_size_spits = size_splits()->shape().dim(0);
+  int32_t count_neg_dim = 0;
+
+  for (int32_t i = 0; i < num_dims_size_spits - 1; ++i)
+  {
+    if (sizes_data[i] != -1)
+    {
+      sum += sizes_data[i];
+    }
+    else
+    {
+      count_neg_dim++;
+    }
+  }
+  assert(count_neg_dim < 2);
+  assert(size_splits()->shape().num_elements() == num_split);
+
+  auto output_shape = input()->shape();
+  for (int32_t i = 0; i < num_split; ++i)
+  {
+    if (sizes_data[i] == -1)
+    {
+      output_shape.dim(_axis_value) = input()->shape().dim(_axis_value) - sum;
+    }
+    else
+    {
+      output_shape.dim(_axis_value) = sizes_data[i];
+    }
+    _outputs[i]->resize(output_shape);
+  }
+}
+
+void SplitV::execute() const
+{
+  tflite::SplitParams params{};
+  params.num_split = _outputs.size();
+  params.axis = _axis_value;
+
+#define TF_LITE_SPLIT(scalar)                                                                     \
+  {                                                                                               \
+    VectorOfTensors<scalar, false> all_outputs(_outputs);                                         \
+    tflite::optimized_ops::Split(params, getTensorShape(input()), getTensorData<scalar>(input()), \
+                                 all_outputs.shapes(), all_outputs.data());                       \
+  }
+
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      TF_LITE_SPLIT(float);
+      break;
+    case DataType::U8:
+      TF_LITE_SPLIT(uint8_t);
+      break;
+    case DataType::S16:
+      TF_LITE_SPLIT(int16_t);
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+#undef TF_LITE_SPLIT
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.h b/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.h
new file mode 100644
index 000000000..92f6288fb
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SPLIT_V_H
+#define LUCI_INTERPRETER_KERNELS_SPLIT_V_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SplitV : public Kernel
+{
+public:
+  SplitV(const Tensor *input, const Tensor *size_splits, const Tensor *axis,
+         std::vector<Tensor *> outputs);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *size_splits() const { return _inputs[1]; }
+  const Tensor *axis() const { return _inputs[2]; }
+  Tensor *output(int index) const { return _outputs[index]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  int32_t _axis_value{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SPLIT_V_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.test.cpp
new file mode 100644
index 000000000..035bc2122
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SplitV.test.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SplitV.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(int axis, std::initializer_list<int32_t> splits_size,
+           std::initializer_list<int32_t> input_shape, std::initializer_list<T> input_data,
+           std::vector<std::vector<T>> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+
+  auto num_splits = static_cast<int32_t>(splits_size.size());
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor sizes_tensor =
+    makeInputTensor<DataType::S32>({num_splits}, splits_size, memory_manager.get());
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({}, {axis}, memory_manager.get());
+
+  std::vector<Tensor> output_tensors;
+  output_tensors.reserve(num_splits);
+  for (int i = 0; i < num_splits; ++i)
+  {
+    output_tensors.emplace_back(makeOutputTensor(element_type));
+  }
+
+  std::vector<Tensor *> output_tensor_ptrs(num_splits);
+  for (int i = 0; i < num_splits; ++i)
+  {
+    output_tensor_ptrs[i] = &output_tensors[i];
+  }
+
+  SplitV kernel(&input_tensor, &sizes_tensor, &axis_tensor, std::move(output_tensor_ptrs));
+  kernel.configure();
+  for (int i = 0; i < num_splits; ++i)
+  {
+    memory_manager->allocate_memory(output_tensors[i]);
+  }
+  kernel.execute();
+
+  for (int i = 0; i < num_splits; ++i)
+  {
+    auto tmp = extractTensorData<T>(output_tensors[i]);
+    EXPECT_THAT(extractTensorData<T>(output_tensors[i]),
+                ::testing::ElementsAreArray(output_data[i]));
+  }
+}
+
+template <typename T> class SplitVTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t, int16_t>;
+TYPED_TEST_SUITE(SplitVTest, DataTypes);
+
+TYPED_TEST(SplitVTest, ThreeDimensional)
+{
+  Check<TypeParam>(
+    /*axis=*/0, /*splits_size=*/{1, 2}, {3, 3, 3},
+    {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14,
+     15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
+    {
+      {1, 2, 3, 4, 5, 6, 7, 8, 9},                                             //
+      {10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27} //
+    });
+  Check<TypeParam>(
+    /*axis=*/1, /*splits_size=*/{1, 2}, {3, 3, 3},
+    {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14,
+     15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
+    {
+      {1, 2, 3, 10, 11, 12, 19, 20, 21},                                 //
+      {4, 5, 6, 7, 8, 9, 13, 14, 15, 16, 17, 18, 22, 23, 24, 25, 26, 27} //
+    });
+  Check<TypeParam>(
+    /*axis=*/2, /*splits_size=*/{1, 2}, {3, 3, 3},
+    {1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14,
+     15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27},
+    {
+      {1, 4, 7, 10, 13, 16, 19, 22, 25},                                 //
+      {2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 23, 24, 26, 27} //
+    });
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.cpp
new file mode 100644
index 000000000..46e9fc9ad
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Sqrt.h"
+#include "kernels/Utils.h"
+
+#include <stdexcept>
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Sqrt::Sqrt(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Sqrt::configure()
+{
+  if (input()->element_type() != output()->element_type())
+  {
+    throw std::runtime_error("Input/output tensor data type mismatch.");
+  }
+  output()->resize(input()->shape());
+}
+
+void Sqrt::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Sqrt::evalFloat() const
+{
+  auto in = getTensorData<float>(input());
+  auto out = getTensorData<float>(output());
+  auto size = getTensorShape(input()).FlatSize();
+  for (auto i = in; i != in + size; ++i)
+  {
+    *out = std::sqrt(*i);
+    ++out;
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.h b/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.h
new file mode 100644
index 000000000..4034655ed
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SQRT_H
+#define LUCI_INTERPRETER_KERNELS_SQRT_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Sqrt : public Kernel
+{
+public:
+  Sqrt(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SQRT_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.test.cpp
new file mode 100644
index 000000000..96835fbfc
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Sqrt.test.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Sqrt.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Sqrt kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(SqrtTest, SimpleSqrt)
+{
+  Check(
+    /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
+    /*input_data=*/
+    {
+      0, 8, 2, 4,    //
+      3, 7, 10, 0.3, //
+    },
+    /*output_data=*/
+    {
+      0.0, 2.8284271, 1.4142136, 2,                //
+      1.7320508, 2.6457513, 3.1622777, 0.54772256, //
+    });
+}
+
+TEST(SqrtTest, Input_Output_Type_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  Sqrt kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(SqrtTest, Invalid_Input_Type_NEG)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1}, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  Sqrt kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Square.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Square.cpp
new file mode 100644
index 000000000..bc71905c1
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Square.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Square.h"
+#include "kernels/Utils.h"
+
+#include <stdexcept>
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Square::Square(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Square::configure()
+{
+  if (input()->element_type() != output()->element_type())
+  {
+    throw std::runtime_error("Input/output tensor data type mismatch.");
+  }
+  output()->resize(input()->shape());
+}
+
+void Square::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Square::evalFloat() const
+{
+  auto in = getTensorData<float>(input());
+  auto out = getTensorData<float>(output());
+  auto size = getTensorShape(input()).FlatSize();
+  for (auto i = in; i != in + size; ++i)
+  {
+    *out = (*i) * (*i);
+    ++out;
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Square.h b/compiler/luci-micro/luci-interpreter/src/kernels/Square.h
new file mode 100644
index 000000000..73ed5a707
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Square.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SQUARE_H
+#define LUCI_INTERPRETER_KERNELS_SQUARE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Square : public Kernel
+{
+public:
+  Square(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SQUARE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Square.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Square.test.cpp
new file mode 100644
index 000000000..51662dea7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Square.test.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Square.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(SquareTest, Float)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Shape input_shape{3, 1, 2};
+  std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Square kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{1.0, 0.0, 1.0, 121.0, 4.0, 2.0736};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.cpp
new file mode 100644
index 000000000..3bafeba4a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SquaredDifference.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+SquaredDifference::SquaredDifference(const Tensor *input1, const Tensor *input2, Tensor *output)
+  : Kernel({input1, input2}, {output})
+{
+}
+
+void SquaredDifference::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type())
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type())
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void SquaredDifference::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalSquaredDifference<float>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T> inline void SquaredDifference::evalSquaredDifference() const
+{
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+                        getTensorShape(input2()), getTensorData<T>(input2()),
+                        getTensorShape(output()), getTensorData<T>(output()), [](T x, T y) {
+                          const T difference = x - y;
+                          return difference * difference;
+                        });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.h b/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.h
new file mode 100644
index 000000000..9327caf93
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SQUAREDDIFFERENCE_H
+#define LUCI_INTERPRETER_KERNELS_SQUAREDDIFFERENCE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SquaredDifference : public Kernel
+{
+public:
+  SquaredDifference(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> inline void evalSquaredDifference() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SQUAREDDIFFERENCE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.test.cpp
new file mode 100644
index 000000000..2819c01e2
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/SquaredDifference.test.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SquaredDifference.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(SquaredDifferenceTest, Float)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Shape input_shape{3, 1, 2};
+  std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
+  std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43};
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data1, memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data2, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  SquaredDifference kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{4.0, 0.0, 4.0, 1.0, 1.0, 0.0001};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(SquaredDifferenceTest, FloatBroadcast)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Shape input_shape1{3, 1, 2};
+  Shape input_shape2{1};
+  std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
+  std::vector<float> input_data2{1.0};
+  Tensor input_tensor1 =
+    makeInputTensor<DataType::FLOAT32>(input_shape1, input_data1, memory_manager.get());
+  Tensor input_tensor2 =
+    makeInputTensor<DataType::FLOAT32>(input_shape2, input_data2, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  SquaredDifference kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.0, 1.0, 4.0, 100.0, 9.0, 5.9536};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.cpp
new file mode 100644
index 000000000..4a75518c7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.cpp
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Squeeze.h"
+
+#include "kernels/Utils.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Squeeze::Squeeze(const Tensor *input, Tensor *output, const SqueezeParams &params)
+  : KernelWithParams<SqueezeParams>({input}, {output}, params)
+{
+}
+
+void Squeeze::configure()
+{
+  int input_num_dims = input()->shape().num_dims();
+  int num_squeeze_dims = params().squeeze_dims.size();
+  assert(input_num_dims <= 8);
+  bool should_squeeze[8] = {false};
+  int num_squeezed_dims = 0;
+  if (num_squeeze_dims == 0)
+  {
+    for (int idx = 0; idx < input_num_dims; ++idx)
+    {
+      if (input()->shape().dim(idx) == 1)
+      {
+        should_squeeze[idx] = true;
+        ++num_squeezed_dims;
+      }
+    }
+  }
+  else
+  {
+    for (int idx = 0; idx < num_squeeze_dims; ++idx)
+    {
+      int current = params().squeeze_dims[idx] < 0 ? params().squeeze_dims[idx] + input_num_dims
+                                                   : params().squeeze_dims[idx];
+      assert(current >= 0 && current < input_num_dims && input()->shape().dim(current) == 1);
+      if (!should_squeeze[current])
+        ++num_squeezed_dims;
+      should_squeeze[current] = true;
+    }
+  }
+  Shape output_shape(input_num_dims - num_squeezed_dims);
+  for (int in_idx = 0, out_idx = 0; in_idx < input_num_dims; ++in_idx)
+  {
+    if (!should_squeeze[in_idx])
+    {
+      output_shape.dim(out_idx++) = input()->shape().dim(in_idx);
+    }
+  }
+  output()->resize(output_shape);
+}
+
+void Squeeze::execute() const
+{
+  assert(input()->shape().num_elements() == output()->shape().num_elements());
+
+  const auto *input_data = input()->data<void>();
+  auto *output_data = output()->data<void>();
+  std::memcpy(output_data, input_data,
+              getDataTypeSize(input()->element_type()) * input()->shape().num_elements());
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.h b/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.h
new file mode 100644
index 000000000..687af5158
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SQUEEZE_H
+#define LUCI_INTERPRETER_KERNELS_SQUEEZE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Squeeze : public KernelWithParams<SqueezeParams>
+{
+public:
+  Squeeze(const Tensor *input, Tensor *output, const SqueezeParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SQUEEZE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.test.cpp
new file mode 100644
index 000000000..1bc0b6459
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Squeeze.test.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Squeeze.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<T> input_data, std::initializer_list<T> output_data,
+           std::initializer_list<int32_t> squeeze_dims)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  SqueezeParams params{};
+  params.squeeze_dims = squeeze_dims;
+
+  Squeeze kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <typename T> class SqueezeTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(SqueezeTest, DataTypes);
+
+TYPED_TEST(SqueezeTest, TotalTest)
+{
+  Check<TypeParam>(
+    /*input_shape=*/{1, 24, 1}, /*output_shape=*/{24},
+    /*input_data=*/{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
+                    13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
+    /*output_data=*/{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
+                     13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
+    {-1, 0});
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.cpp
new file mode 100644
index 000000000..a8730d861
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.cpp
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/StridedSlice.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/strided_slice.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+StridedSlice::StridedSlice(const Tensor *input, const Tensor *begin, const Tensor *end,
+                           const Tensor *strides, Tensor *output, const StridedSliceParams &params)
+  : KernelWithParams<StridedSliceParams>({input, begin, end, strides}, {output}, params)
+{
+}
+
+void StridedSlice::configure()
+{
+  assert(begin()->shape().num_dims() == 1);
+  assert(end()->shape().num_dims() == 1);
+  assert(strides()->shape().num_dims() == 1);
+  assert(input()->element_type() == output()->element_type());
+  assert(begin()->element_type() == DataType::S32);
+  assert(end()->element_type() == DataType::S32);
+  assert(strides()->element_type() == DataType::S32);
+  assert(input()->shape().num_dims() <= 4);
+  if (params().ellipsis_mask != 0)
+  {
+    throw std::runtime_error("ellipsis_mask is not implemented yet.");
+  }
+  if (params().new_axis_mask != 0)
+  {
+    throw std::runtime_error("new_axis_mask is not implemented yet.");
+  }
+  if (input()->element_type() == DataType::U8)
+  {
+    assert(input()->scale() == output()->scale());
+    assert(input()->zero_point() == output()->zero_point());
+  }
+  tflite::StridedSliceParams op_params{};
+  op_params.start_indices_count = input()->shape().num_dims();
+  op_params.stop_indices_count = input()->shape().num_dims();
+  op_params.strides_count = input()->shape().num_dims();
+
+  for (int i = 0; i < input()->shape().num_dims(); i++)
+  {
+    op_params.start_indices[i] = getTensorData<int32_t>(begin())[i];
+    op_params.stop_indices[i] = getTensorData<int32_t>(end())[i];
+    op_params.strides[i] = getTensorData<int32_t>(strides())[i];
+  }
+  op_params.begin_mask = params().begin_mask;
+  op_params.ellipsis_mask = 0;
+  op_params.end_mask = params().end_mask;
+  op_params.new_axis_mask = 0;
+  op_params.shrink_axis_mask = params().shrink_axis_mask;
+  std::vector<int32_t> output_shape_vector;
+  for (int i = 0; i < input()->shape().num_dims(); i++)
+  {
+    int idx = input()->shape().num_dims() - i - 1;
+    int32_t stride = getTensorData<int32_t>(strides())[idx];
+    assert(stride != 0);
+    int32_t begin = ::tflite::strided_slice::StartForAxis(op_params, getTensorShape(input()), idx);
+    int32_t end =
+      ::tflite::strided_slice::StopForAxis(op_params, getTensorShape(input()), idx, begin);
+
+    const bool shrink_axis = params().shrink_axis_mask & (1 << idx);
+    if (shrink_axis)
+    {
+      end = begin + 1;
+    }
+
+    int32_t dim_shape = std::ceil((end - begin) / static_cast<float>(stride));
+    dim_shape = dim_shape < 0 ? 0 : dim_shape;
+    if (!shrink_axis)
+    {
+      output_shape_vector.push_back(dim_shape);
+    }
+  }
+  Shape output_shape = Shape(output_shape_vector.size());
+  for (size_t i = 0; i < output_shape_vector.size(); i++)
+  {
+    output_shape.dim(i) = output_shape_vector[output_shape_vector.size() - i - 1];
+  }
+  output()->resize(output_shape);
+}
+
+void StridedSlice::execute() const
+{
+  tflite::StridedSliceParams op_params{};
+  op_params.start_indices_count = input()->shape().num_dims();
+  op_params.stop_indices_count = input()->shape().num_dims();
+  op_params.strides_count = input()->shape().num_dims();
+
+  for (int i = 0; i < input()->shape().num_dims(); i++)
+  {
+    op_params.start_indices[i] = getTensorData<int32_t>(begin())[i];
+    op_params.stop_indices[i] = getTensorData<int32_t>(end())[i];
+    op_params.strides[i] = getTensorData<int32_t>(strides())[i];
+  }
+  op_params.begin_mask = params().begin_mask;
+  op_params.ellipsis_mask = 0;
+  op_params.end_mask = params().end_mask;
+  op_params.new_axis_mask = 0;
+  op_params.shrink_axis_mask = params().shrink_axis_mask;
+
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()),
+                                          getTensorData<float>(input()), getTensorShape(output()),
+                                          getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()),
+                                          getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                          getTensorData<uint8_t>(output()));
+      break;
+    case DataType::S32:
+      tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()),
+                                          getTensorData<int32_t>(input()), getTensorShape(output()),
+                                          getTensorData<int32_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.h b/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.h
new file mode 100644
index 000000000..fc96893a7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_STRIDEDSLICE_H
+#define LUCI_INTERPRETER_KERNELS_STRIDEDSLICE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class StridedSlice : public KernelWithParams<StridedSliceParams>
+{
+public:
+  StridedSlice(const Tensor *input, const Tensor *begin, const Tensor *end, const Tensor *strides,
+               Tensor *output, const StridedSliceParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *begin() const { return _inputs[1]; }
+  const Tensor *end() const { return _inputs[2]; }
+  const Tensor *strides() const { return _inputs[3]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_STRIDEDSLICE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.test.cpp
new file mode 100644
index 000000000..399cdebed
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/StridedSlice.test.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/StridedSlice.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(StridedSliceTest, Float)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Shape input_shape{2, 3, 2};
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  Shape begin_shape{3};
+  std::vector<int32_t> begin_data{0, 0, 0};
+  Shape end_shape{3};
+  std::vector<int32_t> end_data{1, 3, 2};
+  Shape strides_shape{3};
+  std::vector<int32_t> strides_data{1, 1, 1};
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, memory_manager.get());
+  Tensor begin_tensor =
+    makeInputTensor<DataType::S32>(begin_shape, begin_data, memory_manager.get());
+  Tensor end_tensor = makeInputTensor<DataType::S32>(end_shape, end_data, memory_manager.get());
+  Tensor strides_tensor =
+    makeInputTensor<DataType::S32>(strides_shape, strides_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  StridedSliceParams params{};
+  params.begin_mask = 0;
+  params.end_mask = 0;
+  params.ellipsis_mask = 0;
+  params.new_axis_mask = 0;
+  params.shrink_axis_mask = 1;
+
+  StridedSlice kernel(&input_tensor, &begin_tensor, &end_tensor, &strides_tensor, &output_tensor,
+                      params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<int32_t> output_shape{3, 2};
+  std::vector<float> output_data{1, 2, 3, 4, 5, 6};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(StridedSliceTest, Uint8)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  Shape input_shape{2, 3, 2};
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  Shape begin_shape{3};
+  std::vector<int32_t> begin_data{0, 0, 0};
+  Shape end_shape{3};
+  std::vector<int32_t> end_data{1, 3, 2};
+  Shape strides_shape{3};
+  std::vector<int32_t> strides_data{1, 1, 1};
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>(input_shape, 1.0f, 0, input_data, memory_manager.get());
+  Tensor begin_tensor =
+    makeInputTensor<DataType::S32>(begin_shape, begin_data, memory_manager.get());
+  Tensor end_tensor = makeInputTensor<DataType::S32>(end_shape, end_data, memory_manager.get());
+  Tensor strides_tensor =
+    makeInputTensor<DataType::S32>(strides_shape, strides_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 1.0f, 0);
+
+  StridedSliceParams params{};
+  params.begin_mask = 0;
+  params.end_mask = 0;
+  params.ellipsis_mask = 0;
+  params.new_axis_mask = 0;
+  params.shrink_axis_mask = 1;
+
+  StridedSlice kernel(&input_tensor, &begin_tensor, &end_tensor, &strides_tensor, &output_tensor,
+                      params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<int32_t> output_shape{3, 2};
+  std::vector<float> output_data{1, 2, 3, 4, 5, 6};
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Sub.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Sub.cpp
new file mode 100644
index 000000000..24b6a72e5
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Sub.cpp
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Sub.h"
+#include "kernels/Utils.h"
+
+#include "PALSub.h"
+
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Sub::Sub(const Tensor *input1, const Tensor *input2, Tensor *output, const SubParams &params)
+  : KernelWithParams<SubParams>({input1, input2}, {output}, params)
+{
+}
+
+void Sub::configure()
+{
+  LUCI_INTERPRETER_CHECK(!(input1()->element_type() != input2()->element_type()))
+  LUCI_INTERPRETER_CHECK(!(input1()->element_type() != output()->element_type()))
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Sub::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::S64:
+      evalInteger<int64_t>();
+      break;
+    case DataType::S32:
+      evalInteger<int32_t>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Sub::evalFloat() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<float>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastSubSlow(
+      params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+      getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+  }
+  else
+  {
+    luci_interpreter_pal::Sub(params, getTensorShape(input1()), getTensorData<float>(input1()),
+                              getTensorShape(input2()), getTensorData<float>(input2()),
+                              getTensorShape(output()), getTensorData<float>(output()));
+  }
+}
+
+template <typename T> void Sub::evalInteger() const
+{
+  tflite::ArithmeticParams params{};
+  fillArithmeticActivationRange<T>(params, _params.activation);
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastSubSlow(
+      params, getTensorShape(input1()), getTensorData<T>(input1()), getTensorShape(input2()),
+      getTensorData<T>(input2()), getTensorShape(output()), getTensorData<T>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData<T>(input1()),
+                               getTensorShape(input2()), getTensorData<T>(input2()),
+                               getTensorShape(output()), getTensorData<T>(output()));
+  }
+}
+
+void Sub::evalQuantized() const
+{
+  const auto input1_scale = static_cast<double>(input1()->scale());
+  const auto input2_scale = static_cast<double>(input2()->scale());
+  const auto output_scale = static_cast<double>(output()->scale());
+
+  const int left_shift = 20;
+  const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
+  const double real_input1_multiplier = input1_scale / twice_max_input_scale;
+  const double real_input2_multiplier = input2_scale / twice_max_input_scale;
+  const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
+
+  int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
+  int input1_shift{}, input2_shift{}, output_shift{};
+  quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift);
+  quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift);
+  quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::ArithmeticParams params{};
+  params.left_shift = left_shift;
+  // The kernel expects inputs' zero points to be negated.
+  params.input1_offset = -input1()->zero_point(); // Note the '-'.
+  params.input1_multiplier = input1_multiplier;
+  params.input1_shift = input1_shift;
+  params.input2_offset = -input2()->zero_point(); // Note the '-'.
+  params.input2_multiplier = input2_multiplier;
+  params.input2_shift = input2_shift;
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = output_multiplier;
+  params.output_shift = output_shift;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+    getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastSubSlow(
+      params, getTensorShape(input1()), getTensorData<uint8_t>(input1()), getTensorShape(input2()),
+      getTensorData<uint8_t>(input2()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
+                               getTensorShape(input2()), getTensorData<uint8_t>(input2()),
+                               getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Sub.h b/compiler/luci-micro/luci-interpreter/src/kernels/Sub.h
new file mode 100644
index 000000000..23952b3bd
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Sub.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SUB_H
+#define LUCI_INTERPRETER_KERNELS_SUB_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Sub : public KernelWithParams<SubParams>
+{
+public:
+  Sub(const Tensor *input1, const Tensor *input2, Tensor *output, const SubParams &params);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  template <typename T> void evalInteger() const;
+  void evalQuantized() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SUB_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Sub.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Sub.test.cpp
new file mode 100644
index 000000000..9abafd49a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Sub.test.cpp
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Sub.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+#include <algorithm>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+using std::pair;
+using std::vector;
+using std::transform;
+using std::initializer_list;
+
+class SubTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+// for quantized Add, the error shouldn't exceed step
+float GetTolerance(float min, float max)
+{
+  float kQuantizedStep = (max - min) / 255.0;
+  return kQuantizedStep;
+}
+
+TEST_F(SubTest, Uint8)
+{
+  Shape base_shape = {2, 3, 1, 2};
+  vector<float> base_data = {-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
+                             1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
+  vector<Shape> test_shapes = {{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  vector<float> test_data = {0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+  vector<vector<int32_t>> output_shapes = {{2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
+  vector<vector<float>> output_data = {
+    {-0.5f, 2.0f,  0.1f,  1.8f,  -1.3f, 1.4f,  0.7f, 0.2f,  1.3f, 0.0f,  -0.1f, -0.4f,
+     0.6f,  -1.4f, 1.2f,  -1.6f, -0.2f, -2.0f, 1.0f, 2.5f,  1.6f, 2.3f,  0.2f,  1.9f,
+     -1.8f, -0.3f, -1.2f, -0.5f, -2.6f, -0.9f, 0.5f, -2.5f, 1.1f, -2.7f, -0.3f, -3.0f},
+    {-0.5f, 2.0f, 1.3f, 0.0f, -0.2f, -2.0f, 1.0f, 2.5f, -1.2f, -0.5f, -0.3f, -3.0f},
+    {-0.5f, 2.1f,  -0.6f, 2.0f,  0.1f,  2.7f,  0.7f, 0.3f,  0.6f,  0.2f,  1.3f,  0.9f,
+     0.6f,  -1.3f, 0.5f,  -1.4f, 1.2f,  -0.7f, 0.7f, 2.3f,  0.2f,  1.8f,  0.3f,  1.9f,
+     -2.1f, -0.5f, -2.6f, -1.0f, -2.5f, -0.9f, 0.2f, -2.7f, -0.3f, -3.0f, -0.2f, -3.0f},
+    {-0.5f, 2.1f, 0.6f, 0.2f, 1.2f, -0.7f, 0.7f, 2.3f, -2.6f, -1.0f, -0.2f, -3.0f}};
+
+  float kQuantizedTolerance = GetTolerance(-3.f, 3.f);
+  pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-3.f, 3.f);
+  for (size_t i = 0; i < output_data.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DataType::U8>(
+      base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get());
+    Tensor input2_tensor = makeInputTensor<DataType::U8>(
+      test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get());
+    Tensor output_tensor =
+      makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+
+    SubParams params{};
+    params.activation = Activation::NONE;
+
+    Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(dequantizeTensorData(output_tensor),
+                FloatArrayNear(output_data[i], kQuantizedTolerance));
+    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i]));
+  }
+
+  // Inversion step for output_data, because subtract is not commutative operation
+  auto multiply = [](auto &i) {
+    transform(i.begin(), i.end(), i.begin(), [](auto &value) { return value * -1.0f; });
+  };
+  for_each(output_data.begin(), output_data.end(), multiply);
+
+  // Re-run with exchanged inputs.
+  for (size_t i = 0; i < output_data.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DataType::U8>(
+      test_shapes[i], quant_param.first, quant_param.second, test_data, _memory_manager.get());
+    Tensor input2_tensor = makeInputTensor<DataType::U8>(
+      base_shape, quant_param.first, quant_param.second, base_data, _memory_manager.get());
+    Tensor output_tensor =
+      makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+
+    SubParams params{};
+    params.activation = Activation::NONE;
+
+    Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(dequantizeTensorData(output_tensor),
+                FloatArrayNear(output_data[i], kQuantizedTolerance));
+    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i]));
+  }
+}
+
+TEST_F(SubTest, Float)
+{
+  Shape base_shape = {2, 3, 1, 2};
+  vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  vector<vector<int32_t>> output_shapes{{2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
+  vector<vector<float>> test_outputs = {
+    {0.0f, 2.0f, 0.1f, 1.8f, 0.0f, 1.4f, 0.7f, 0.2f, 1.3f, 0.0f, 0.0f, 0.0f,
+     0.6f, 0.0f, 1.2f, 0.0f, 0.0f, 0.0f, 1.0f, 2.5f, 1.6f, 2.3f, 0.2f, 1.9f,
+     0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.5f, 0.0f, 1.1f, 0.0f, 0.0f, 0.0f},
+    {0.0f, 2.0f, 1.3f, 0.0f, 0.0f, 0.0f, 1.0f, 2.5f, 0.0f, 0.0f, 0.0f, 0.0f},
+    {0.0f, 2.1f, 0.0f, 2.0f, 0.1f, 2.7f, 0.7f, 0.3f, 0.6f, 0.2f, 1.3f, 0.9f,
+     0.6f, 0.0f, 0.5f, 0.0f, 1.2f, 0.0f, 0.7f, 2.3f, 0.2f, 1.8f, 0.3f, 1.9f,
+     0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.2f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
+    {0.0f, 2.1f, 0.6f, 0.2f, 1.2f, 0.0f, 0.7f, 2.3f, 0.0f, 0.0f, 0.0f, 0.0f}};
+
+  vector<float> input1_data{-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
+                            1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
+  vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor =
+      makeInputTensor<DataType::FLOAT32>(base_shape, input1_data, _memory_manager.get());
+    Tensor input2_tensor =
+      makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data, _memory_manager.get());
+    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+    SubParams params{};
+    params.activation = Activation::RELU;
+
+    Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    _memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
+      << "With shape number " << i;
+
+    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i]));
+  }
+}
+
+template <loco::DataType DType> void CheckInteger(luci_interpreter::IMemoryManager *memory_manager)
+{
+  using dtype = typename loco::DataTypeImpl<DType>::Type;
+  Shape base_shape = {2, 3, 1, 2};
+  std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  std::vector<std::vector<dtype>> test_outputs = {
+    {0, 1, 2, 3, 0, 0, 0, 0, 4,  1, 0, 0, 0, 0, 7,  0, 3, 0,
+     0, 2, 4, 4, 0, 0, 3, 0, 10, 0, 6, 0, 3, 0, 10, 2, 6, 0},
+    {0, 1, 4, 1, 3, 0, 0, 2, 10, 0, 6, 0},
+    {0, 0, 0, 1, 2, 5, 0, 0, 0, 0, 4, 3, 0, 0, 3, 0, 7, 0,
+     2, 4, 0, 2, 0, 0, 8, 0, 6, 0, 1, 0, 8, 2, 6, 0, 1, 0},
+    {0, 0, 0, 0, 7, 0, 2, 4, 6, 0, 1, 0}};
+  std::vector<dtype> input1_data{-1, 2, 1, 0, 4, -5, 1, 3, 7, -1, 7, 1};
+  std::vector<dtype> input2_data{4, 1, -3, -1, 1, 6};
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DType>(base_shape, input1_data, memory_manager);
+    Tensor input2_tensor = makeInputTensor<DType>(test_shapes[i], input2_data, memory_manager);
+    Tensor output_tensor = makeOutputTensor(DType);
+
+    SubParams params{};
+    params.activation = Activation::RELU;
+
+    Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<dtype>(output_tensor), test_outputs[i])
+      << "With shape number " << i;
+  }
+};
+
+TEST_F(SubTest, SInt32)
+{
+  CheckInteger<loco::DataType::S32>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(SubTest, SInt64)
+{
+  CheckInteger<loco::DataType::S64>(_memory_manager.get());
+  SUCCEED();
+}
+
+TEST_F(SubTest, Input_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  SubParams params{};
+  params.activation = Activation::RELU;
+
+  Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(SubTest, Invalid_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  SubParams params{};
+  params.activation = Activation::RELU;
+
+  Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST_F(SubTest, Invalid_Input_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::U64>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::U64>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U64);
+
+  SubParams params{};
+  params.activation = Activation::RELU;
+
+  Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST_F(SubTest, Mismatching_Input_Int_Types_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S32>({1}, {1}, _memory_manager.get());
+  Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2}, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  SubParams params{};
+  params.activation = Activation::NONE;
+
+  Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.cpp
new file mode 100644
index 000000000..c4fa16912
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.cpp
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Tanh.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/tanh.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Tanh::Tanh(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Tanh::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  if (input()->element_type() == DataType::U8)
+  {
+    populateLookupTable();
+  }
+  output()->resize(input()->shape());
+}
+
+void Tanh::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Tanh::evalFloat() const
+{
+  tflite::reference_ops::Tanh(getTensorShape(input()), getTensorData<float>(input()),
+                              getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void Tanh::evalQuantized() const
+{
+  const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
+  uint8_t *output_data = getTensorData<uint8_t>(output());
+  const uint8_t *input_data = getTensorData<uint8_t>(input());
+  for (int i = 0; i < size; ++i)
+  {
+    output_data[i] = getTableValue(input_data[i]);
+  }
+}
+
+void Tanh::populateLookupTable()
+{
+  const auto input_scale = static_cast<double>(input()->scale());
+  const auto input_zero_point = static_cast<int32_t>(input()->zero_point());
+  const auto output_scale = static_cast<double>(output()->scale());
+  const auto output_zero_point = static_cast<int32_t>(output()->zero_point());
+  const float inverse_scale = 1 / output_scale;
+  int32_t maxval = std::numeric_limits<uint8_t>::max();
+  int32_t minval = std::numeric_limits<uint8_t>::min();
+  for (int32_t val = minval; val <= maxval; ++val)
+  {
+    const float dequantized = input_scale * (val - input_zero_point);
+    const float transformed = std::tanh(dequantized);
+    const float rescaled = std::round(transformed * inverse_scale);
+    const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
+    setTableValue(static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval)),
+                  static_cast<uint8_t>(val));
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.h b/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.h
new file mode 100644
index 000000000..8017c9638
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_TANH_H
+#define LUCI_INTERPRETER_KERNELS_TANH_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Tanh : public Kernel
+{
+public:
+  Tanh(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void populateLookupTable();
+  void setTableValue(uint8_t value, uint8_t idx) { _table[idx] = value; };
+  uint8_t getTableValue(uint8_t idx) const { return _table[idx]; };
+
+private:
+  uint8_t _table[256]{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_TANH_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.test.cpp
new file mode 100644
index 000000000..bfae479a9
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Tanh.test.cpp
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Tanh.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+class TanhTest : public ::testing::Test
+{
+protected:
+  void SetUp() override { _memory_manager = std::make_unique<TestMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+};
+
+TEST_F(TanhTest, Float)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+    0, -6, 2,  4, //
+    3, -2, 10, 1, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>(input_shape, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Tanh kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    0,          -0.9999877, 0.9640275, 0.999329,  //
+    0.99505475, -0.9640275, 1,         0.7615941, //
+  };
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST_F(TanhTest, Uint8)
+{
+  float kMin = -1;
+  float kMax = 127.f / 128.f;
+  float kTanhTolerance = 2 * (1. / 256);
+  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(8 * kMin, 8 * kMax);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(kMin, kMax);
+  std::vector<float> input_data{
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::U8>({2, 6, 4, 1}, input_quant_param.first, input_quant_param.second,
+                                  input_data, _memory_manager.get());
+  Tensor output_tensor =
+    makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+
+  Tanh kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  _memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+    0.0,       -0.999987, 0.964027, 0.999329, //
+    -0.999329, -0.96402,  0.99999,  0.76159,  //
+    0.0,       -0.999987, 0.964027, 0.999329, //
+    -0.999329, -0.96402,  0.99999,  0.76159,  //
+    0.0,       -0.999987, 0.964027, 0.999329, //
+    -0.999329, -0.96402,  0.99999,  0.76159,  //
+    0.0,       -0.999987, 0.964027, 0.999329, //
+    -0.999329, -0.96402,  0.99999,  0.76159,  //
+    0.0,       -0.999987, 0.964027, 0.999329, //
+    -0.999329, -0.96402,  0.99999,  0.76159,  //
+    0.0,       -0.999987, 0.964027, 0.999329, //
+    -0.999329, -0.96402,  0.99999,  0.76159,  //
+  };
+  std::vector<int32_t> ref_output_shape{2, 6, 4, 1};
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data, kTanhTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST_F(TanhTest, InputTypeInvalid_NEG)
+{
+  std::vector<int64_t> input_data{
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::S64>({2, 6, 4, 1}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Tanh kernel(&input_tensor, &output_tensor);
+  _memory_manager->allocate_memory(output_tensor);
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST_F(TanhTest, InputOutputMismatch_NEG)
+{
+  std::vector<float> input_data{
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+    0,  -6, 2, 4, //
+    -4, -2, 8, 1, //
+  };
+  Tensor input_tensor =
+    makeInputTensor<DataType::FLOAT32>({2, 6, 4, 1}, input_data, _memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Tanh kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.cpp
new file mode 100644
index 000000000..4d983adda
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.cpp
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace testing
+{
+
+using ::testing::FloatNear;
+using ::testing::Matcher;
+
+Tensor makeOutputTensor(DataType element_type) { return Tensor(element_type, {}, {}, ""); }
+
+Tensor makeOutputTensor(DataType element_type, float scale, int32_t zero_point)
+{
+  return Tensor(element_type, {}, {{scale}, {zero_point}}, "");
+}
+
+std::vector<float> dequantizeTensorData(const Tensor &tensor)
+{
+  if (tensor.element_type() == DataType::U8)
+  {
+    std::vector<uint8_t> data = extractTensorData<uint8_t>(tensor);
+    return dequantize(data.data(), data.size(), tensor.scale(), tensor.zero_point());
+  }
+  if (tensor.element_type() == DataType::S8)
+  {
+    std::vector<int8_t> data = extractTensorData<int8_t>(tensor);
+    return dequantize(data.data(), data.size(), tensor.scale(), tensor.zero_point());
+  }
+  else if (tensor.element_type() == DataType::S16)
+  {
+    // S16 quantization is symmetric, so zero point should be zero.
+    for (auto zp : tensor.zero_points())
+    {
+      (void)zp;
+      assert(zp == 0);
+    }
+
+    std::vector<int16_t> data = extractTensorData<int16_t>(tensor);
+    if (tensor.scales().size() == 1)
+    {
+      return dequantize(data.data(), data.size(), tensor.scale(), 0);
+    }
+
+    // quantize_dimension breaks shape into two parts:
+    // inner dimensions that contains continuous data with one quantization type
+    // outer dimensions that contains other dimensions
+    const Shape shape = tensor.shape();
+    const int32_t quantized_dimension = tensor.quantized_dimension();
+    assert(quantized_dimension < shape.num_dims());
+    size_t outer_dims_size = 1;
+    int32_t quant_dim_size = shape.dim(quantized_dimension);
+    size_t inner_dims_size = 1;
+    assert(quant_dim_size == tensor.scales().size());
+
+    for (int i = 0; i < quantized_dimension; ++i)
+      outer_dims_size *= shape.dim(i);
+    for (int i = quantized_dimension + 1; i < shape.num_dims(); ++i)
+      inner_dims_size *= shape.dim(i);
+
+    assert(shape.num_elements() == outer_dims_size * quant_dim_size * inner_dims_size);
+
+    std::vector<float> dequantized_data;
+    dequantized_data.reserve(shape.num_elements());
+    for (size_t outer_it = 0; outer_it < outer_dims_size; ++outer_it)
+      for (int32_t channel = 0; channel < quant_dim_size; ++channel)
+      {
+        float scale = tensor.scales()[channel];
+        size_t offset = inner_dims_size * (quant_dim_size * outer_it + channel);
+        std::vector<float> part_dequantized_data =
+          dequantize(data.data() + offset, inner_dims_size, scale, 0);
+        dequantized_data.insert(dequantized_data.end(), part_dequantized_data.begin(),
+                                part_dequantized_data.end());
+      }
+    return dequantized_data;
+  }
+  else
+  {
+    throw std::runtime_error("Unsupported type.");
+  }
+}
+
+Matcher<std::vector<float>> FloatArrayNear(const std::vector<float> &values, float max_abs_error)
+{
+  std::vector<Matcher<float>> matchers;
+  matchers.reserve(values.size());
+  for (const float v : values)
+  {
+    matchers.emplace_back(FloatNear(v, max_abs_error));
+  }
+  return ElementsAreArray(matchers);
+}
+
+std::vector<int32_t> extractTensorShape(const Tensor &tensor)
+{
+  std::vector<int32_t> result;
+  int dims = tensor.shape().num_dims();
+  for (int i = 0; i < dims; i++)
+  {
+    result.push_back(tensor.shape().dim(i));
+  }
+  return result;
+}
+
+} // namespace testing
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.h b/compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.h
new file mode 100644
index 000000000..1f5a0c308
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/TestUtils.h
@@ -0,0 +1,296 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_TESTUTILS_H
+#define LUCI_INTERPRETER_KERNELS_TESTUTILS_H
+
+#include "luci_interpreter/core/Tensor.h"
+#include "luci_interpreter/MemoryManager.h"
+
+#include <type_traits>
+
+#include <gtest/gtest.h>
+#include <gmock/gmock.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace testing
+{
+
+template <typename T>
+std::vector<T> quantize(const float *data, size_t num_elements, float scale, int32_t zero_point);
+
+template <DataType DT>
+Tensor makeInputTensor(const Shape &shape, const std::vector<typename DataTypeImpl<DT>::Type> &data,
+                       IMemoryManager *memory_manager)
+{
+  Tensor tensor(DT, shape, {}, "");
+  memory_manager->allocate_memory(tensor);
+  tensor.writeData(data.data(), data.size() * sizeof(typename DataTypeImpl<DT>::Type));
+  return tensor;
+}
+
+/**
+ * @brief Create layer-wise quantized tensor
+ * @tparam DT base integer data type, for example DataType::U8, DataType::S16, DataType::S64
+ * @param shape desired tensor shape
+ * @param scale scale of quantized number
+ * @param zero_point zero point of quantized number, should be 0 for signed datatypes
+ * @param data floating point data for quantization
+ * @param memory_manager memory manager for allocating memory to tensor
+ * @return created tensor
+ */
+template <DataType DT>
+Tensor makeInputTensor(const Shape &shape, float scale, int32_t zero_point,
+                       const std::vector<float> &data, IMemoryManager *memory_manager)
+{
+  using NativeT = typename DataTypeImpl<DT>::Type;
+  Tensor tensor(DT, shape, {{scale}, {zero_point}}, "");
+  std::vector<NativeT> quantized_data =
+    quantize<NativeT>(data.data(), data.size(), scale, zero_point);
+  memory_manager->allocate_memory(tensor);
+  tensor.writeData(quantized_data.data(), quantized_data.size() * sizeof(NativeT));
+  return tensor;
+}
+
+/**
+ * @brief Create channel-wise quantized tensor
+ * @tparam DT base integer data type, for example DataType::U8, DataType::S16, DataType::S64
+ * @param shape desired tensor shape
+ * @param scales scales of quantized number
+ * @param zero_points zero points of quantized number, should be 0 for signed datatypes
+ * @param quantize_dimension dimension to apply quantization along. Usually channels/output channels
+ * @param data floating point data for quantization
+ * @param memory_manager memory manager for allocating memory to tensor
+ * @return created tensor
+ */
+template <DataType DT>
+Tensor makeInputTensor(const Shape &shape, const std::vector<float> &scales,
+                       const std::vector<int32_t> &zero_points, int quantized_dimension,
+                       const std::vector<float> &data, IMemoryManager *memory_manager)
+{
+  using NativeT = typename DataTypeImpl<DT>::Type;
+  assert(quantized_dimension < shape.num_dims());
+  Tensor tensor(DT, shape, {scales, zero_points, quantized_dimension}, "");
+
+  // quantize_dimension breaks shape into two parts:
+  // inner dimensions that contains continuous data with one quantization type
+  // outer dimensions that contains other dimensions
+  size_t outer_dims_size = 1;
+  int32_t quant_dim_size = shape.dim(quantized_dimension);
+  size_t inner_dims_size = 1;
+  assert(quant_dim_size == scales.size());
+  assert(quant_dim_size == zero_points.size());
+
+  for (int i = 0; i < quantized_dimension; ++i)
+    outer_dims_size *= shape.dim(i);
+  for (int i = quantized_dimension + 1; i < shape.num_dims(); ++i)
+    inner_dims_size *= shape.dim(i);
+
+  assert(shape.num_elements() == outer_dims_size * quant_dim_size * inner_dims_size);
+
+  std::vector<NativeT> quantized_data;
+  quantized_data.reserve(shape.num_elements());
+  for (size_t outer_it = 0; outer_it < outer_dims_size; ++outer_it)
+    for (int32_t channel = 0; channel < quant_dim_size; ++channel)
+    {
+      int32_t zero_point = zero_points[channel];
+      float scale = scales[channel];
+      size_t offset = inner_dims_size * (quant_dim_size * outer_it + channel);
+      std::vector<NativeT> part_quantized_data =
+        quantize<NativeT>(data.data() + offset, inner_dims_size, scale, zero_point);
+      quantized_data.insert(quantized_data.end(), part_quantized_data.begin(),
+                            part_quantized_data.end());
+    }
+  assert(quantized_data.size() == shape.num_elements());
+  memory_manager->allocate_memory(tensor);
+  tensor.writeData(quantized_data.data(), quantized_data.size() * sizeof(NativeT));
+  return tensor;
+}
+
+Tensor makeOutputTensor(DataType element_type);
+Tensor makeOutputTensor(DataType element_type, float scale, int32_t zero_point);
+
+std::vector<int32_t> extractTensorShape(const Tensor &tensor);
+
+// Returns the corresponding DataType given the type T.
+template <typename T> constexpr DataType getElementType()
+{
+  if (std::is_same<T, float>::value)
+    return DataType::FLOAT32;
+  if (std::is_same<T, double>::value)
+    return DataType::FLOAT64;
+  if (std::is_same<T, uint8_t>::value)
+    return DataType::U8;
+  if (std::is_same<T, uint16_t>::value)
+    return DataType::U16;
+  if (std::is_same<T, uint32_t>::value)
+    return DataType::U32;
+  if (std::is_same<T, uint64_t>::value)
+    return DataType::U64;
+  if (std::is_same<T, int8_t>::value)
+    return DataType::S8;
+  if (std::is_same<T, int16_t>::value)
+    return DataType::S16;
+  if (std::is_same<T, int32_t>::value)
+    return DataType::S32;
+  if (std::is_same<T, int64_t>::value)
+    return DataType::S64;
+  if (std::is_same<T, bool>::value)
+    return DataType::BOOL;
+  return DataType::Unknown;
+}
+
+template <typename T> std::vector<T> extractTensorData(const Tensor &tensor)
+{
+  const auto *data_ptr = tensor.data<T>();
+  return std::vector<T>(data_ptr, data_ptr + tensor.shape().num_elements());
+}
+
+std::vector<float> dequantizeTensorData(const Tensor &tensor);
+
+// Array version of `::testing::FloatNear` matcher.
+::testing::Matcher<std::vector<float>> FloatArrayNear(const std::vector<float> &values,
+                                                      float max_abs_error = 1.0e-5f);
+
+template <typename T>
+std::vector<T> quantize(const float *data, size_t num_elements, float scale, int32_t zero_point)
+{
+  static_assert(std::is_integral<T>::value, "Integral type expected.");
+
+  float q_min{}, q_max{};
+  if (std::is_signed<T>::value)
+  {
+    q_min = -std::numeric_limits<T>::max();
+    q_max = std::numeric_limits<T>::max();
+  }
+  else
+  {
+    q_min = 0;
+    q_max = std::numeric_limits<T>::max();
+  }
+
+  std::vector<T> q;
+  for (size_t i = 0; i < num_elements; ++i)
+  {
+    const auto &f = data[i];
+    q.push_back(static_cast<T>(
+      std::max<float>(q_min, std::min<float>(q_max, std::round(zero_point + (f / scale))))));
+  }
+  return q;
+}
+
+template <typename T>
+std::vector<float> dequantize(const T *data, size_t num_elements, float scale, int32_t zero_point)
+{
+  static_assert(std::is_integral<T>::value, "Integral type expected.");
+  std::vector<float> f;
+  for (size_t i = 0; i < num_elements; ++i)
+  {
+    const T &q = data[i];
+    f.push_back(scale * (q - zero_point));
+  }
+  return f;
+}
+
+// NOTE Returns scale and zero point for _asymmetric_ range (both signed and unsigned).
+template <typename T> std::pair<float, int32_t> quantizationParams(float f_min, float f_max)
+{
+  static_assert(std::is_integral<T>::value, "Integral type expected.");
+  int32_t zero_point = 0;
+  float scale = 0;
+  const T qmin = std::numeric_limits<T>::lowest();
+  const T qmax = std::numeric_limits<T>::max();
+  const float qmin_double = qmin;
+  const float qmax_double = qmax;
+  // 0 should always be a representable value. Let's assume that the initial
+  // min,max range contains 0.
+  assert(f_max >= 0);
+  assert(f_min <= 0);
+  if (f_min == f_max)
+  {
+    // Special case where the min,max range is a point. Should be {0}.
+    assert(f_max == 0);
+    assert(f_min == 0);
+    return {scale, zero_point};
+  }
+
+  // General case.
+  //
+  // First determine the scale.
+  scale = (f_max - f_min) / (qmax_double - qmin_double);
+
+  // Zero-point computation.
+  // First the initial floating-point computation. The zero-point can be
+  // determined from solving an affine equation for any known pair
+  // (real value, corresponding quantized value).
+  // We know two such pairs: (rmin, qmin) and (rmax, qmax).
+  // The arithmetic error on the zero point computed from either pair
+  // will be roughly machine_epsilon * (sum of absolute values of terms)
+  // so we want to use the variant that adds the smaller terms.
+  const float zero_point_from_min = qmin_double - f_min / scale;
+  const float zero_point_from_max = qmax_double - f_max / scale;
+
+  const float zero_point_from_min_error = std::abs(qmin_double) + std::abs(f_min / scale);
+
+  const float zero_point_from_max_error = std::abs(qmax_double) + std::abs(f_max / scale);
+
+  const float zero_point_double = zero_point_from_min_error < zero_point_from_max_error
+                                    ? zero_point_from_min
+                                    : zero_point_from_max;
+
+  // Now we need to nudge the zero point to be an integer
+  // (our zero points are integer, and this is motivated by the requirement
+  // to be able to represent the real value "0" exactly as a quantized value,
+  // which is required in multiple places, for example in Im2col with SAME
+  //  padding).
+
+  T nudged_zero_point = 0;
+  if (zero_point_double < qmin_double)
+  {
+    nudged_zero_point = qmin;
+  }
+  else if (zero_point_double > qmax_double)
+  {
+    nudged_zero_point = qmax;
+  }
+  else
+  {
+    nudged_zero_point = static_cast<T>(std::round(zero_point_double));
+  }
+
+  // The zero point should always be in the range of quantized value,
+  // // [qmin, qmax].
+  assert(qmax >= nudged_zero_point);
+  assert(qmin <= nudged_zero_point);
+  zero_point = nudged_zero_point;
+  // finally, return the values
+  return {scale, zero_point};
+}
+
+inline float getTolerance(float min, float max, int quantize_steps)
+{
+  return ((max - min) / quantize_steps);
+}
+
+} // namespace testing
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_TESTUTILS_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.cpp
new file mode 100644
index 000000000..802d87295
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Transpose.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/transpose.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Transpose::Transpose(const Tensor *input, const Tensor *perm, Tensor *output)
+  : Kernel({input, perm}, {output})
+{
+}
+
+void Transpose::configure()
+{
+  // Transpose op only supports 1D-4D input arrays.
+  int dims = input()->shape().num_dims();
+  const int32_t *perm_data = getTensorData<int32_t>(perm());
+
+  assert(input()->shape().num_dims() <= 4);
+  assert(input()->element_type() == output()->element_type());
+
+  assert(perm()->shape().num_dims() == 1);
+  assert(perm()->shape().dim(0) == dims);
+
+  Shape output_shape(dims);
+  for (int i = 0; i < dims; i++)
+  {
+    assert(perm_data[i] < dims && perm_data[i] >= 0);
+    output_shape.dim(i) = input()->shape().dim(perm_data[i]);
+  }
+
+  output()->resize(output_shape);
+}
+
+void Transpose::execute() const
+{
+  tflite::TransposeParams params{};
+  const int32_t *perm_data = getTensorData<int32_t>(perm());
+  const int32_t size = perm()->shape().dim(0);
+  params.perm_count = size;
+  for (int i = 0; i < size; i++)
+    params.perm[i] = perm_data[i];
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::reference_ops::Transpose(params, getTensorShape(input()),
+                                       getTensorData<float>(input()), getTensorShape(output()),
+                                       getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      tflite::reference_ops::Transpose(params, getTensorShape(input()),
+                                       getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                       getTensorData<uint8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.h b/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.h
new file mode 100644
index 000000000..d6f89c352
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_TRANSPOSE_H
+#define LUCI_INTERPRETER_KERNELS_TRANSPOSE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Transpose : public Kernel
+{
+public:
+  Transpose(const Tensor *input, const Tensor *perm, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *perm() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_TRANSPOSE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.test.cpp
new file mode 100644
index 000000000..43be8f8b9
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Transpose.test.cpp
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Transpose.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> perm_shape,
+           std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data,
+           std::initializer_list<int32_t> perm_data, std::initializer_list<T> output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  Tensor perm_tensor = makeInputTensor<DataType::S32>(perm_shape, perm_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  Transpose kernel(&input_tensor, &perm_tensor, &output_tensor);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+}
+
+template <typename T> class TransposeTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(TransposeTest, DataTypes);
+
+TYPED_TEST(TransposeTest, Small3D)
+{
+  Check<TypeParam>(/*input_shape=*/{2, 3, 4}, /*perm_shape=*/{3}, /*output_shape=*/{4, 2, 3},
+                   /*input_data=*/{0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11,
+                                   12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23},
+                   /*perm_data=*/{2, 0, 1},
+                   /*output_data=*/{0, 4, 8,  12, 16, 20, 1, 5, 9,  13, 17, 21,
+                                    2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23});
+}
+
+TYPED_TEST(TransposeTest, Large4D)
+{
+  Check<TypeParam>(
+    /*input_shape=*/{2, 3, 4, 5}, /*perm_shape=*/{4}, /*output_shape=*/{4, 2, 3, 5},
+    /*input_data=*/{0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,
+                    15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
+                    30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
+                    45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
+                    60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
+                    75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
+                    90,  91,  92,  93,  94,  95,  96,  97,  98,  99,  100, 101, 102, 103, 104,
+                    105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119},
+    /*perm_data=*/{2, 0, 1, 3},
+    /*output_data=*/{0,  1,  2,  3,  4,  20, 21, 22, 23, 24, 40,  41,  42,  43,  44,
+                     60, 61, 62, 63, 64, 80, 81, 82, 83, 84, 100, 101, 102, 103, 104,
+                     5,  6,  7,  8,  9,  25, 26, 27, 28, 29, 45,  46,  47,  48,  49,
+                     65, 66, 67, 68, 69, 85, 86, 87, 88, 89, 105, 106, 107, 108, 109,
+                     10, 11, 12, 13, 14, 30, 31, 32, 33, 34, 50,  51,  52,  53,  54,
+                     70, 71, 72, 73, 74, 90, 91, 92, 93, 94, 110, 111, 112, 113, 114,
+                     15, 16, 17, 18, 19, 35, 36, 37, 38, 39, 55,  56,  57,  58,  59,
+                     75, 76, 77, 78, 79, 95, 96, 97, 98, 99, 115, 116, 117, 118, 119});
+}
+
+TYPED_TEST(TransposeTest, Large2D)
+{
+  Check<TypeParam>(
+    /*input_shape=*/{10, 12}, /*perm_shape=*/{2}, /*output_shape=*/{12, 10},
+    /*input_data=*/{0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,
+                    15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
+                    30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
+                    45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
+                    60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
+                    75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
+                    90,  91,  92,  93,  94,  95,  96,  97,  98,  99,  100, 101, 102, 103, 104,
+                    105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119},
+    /*perm_data=*/{1, 0},
+    /*output_data=*/{0,  12, 24, 36,  48,  60, 72, 84, 96,  108, 1,  13, 25, 37,  49,
+                     61, 73, 85, 97,  109, 2,  14, 26, 38,  50,  62, 74, 86, 98,  110,
+                     3,  15, 27, 39,  51,  63, 75, 87, 99,  111, 4,  16, 28, 40,  52,
+                     64, 76, 88, 100, 112, 5,  17, 29, 41,  53,  65, 77, 89, 101, 113,
+                     6,  18, 30, 42,  54,  66, 78, 90, 102, 114, 7,  19, 31, 43,  55,
+                     67, 79, 91, 103, 115, 8,  20, 32, 44,  56,  68, 80, 92, 104, 116,
+                     9,  21, 33, 45,  57,  69, 81, 93, 105, 117, 10, 22, 34, 46,  58,
+                     70, 82, 94, 106, 118, 11, 23, 35, 47,  59,  71, 83, 95, 107, 119});
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.cpp
new file mode 100644
index 000000000..1b5f9d941
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.cpp
@@ -0,0 +1,351 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TransposeConv.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/transpose_conv.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+TransposeConv::TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
+                             const Tensor *bias, Tensor *output, Tensor *scratch_tensor,
+                             const TransposeConvParams &params)
+  : KernelWithParams<TransposeConvParams>({output_shape, filter, input, bias},
+                                          {output, scratch_tensor}, params)
+{
+}
+
+TransposeConv::~TransposeConv()
+{
+  // Define destructor here, to delete vector of qunatized multipliers properly
+}
+
+void TransposeConv::configure()
+{
+  assert(output_shape()->shape().num_dims() == 1);
+  assert(input()->shape().num_dims() == 4);
+  assert(filter()->shape().num_dims() == 4);
+  assert(input()->element_type() == DataType::FLOAT32 || input()->element_type() == DataType::U8 ||
+         input()->element_type() == DataType::S16);
+  assert(input()->element_type() == output()->element_type());
+  assert(input()->shape().dim(3) == filter()->shape().dim(3));
+
+  const int num_dims = output_shape()->shape().dim(0);
+  Shape out_shape(num_dims);
+  const auto *shape_data = getTensorData<int32_t>(output_shape());
+  for (int i = 0; i < num_dims; i++)
+    out_shape.dim(i) = shape_data[i];
+  output()->resize(out_shape);
+
+  const int32_t filter_height = filter()->shape().dim(1);
+  const int32_t filter_width = filter()->shape().dim(2);
+  const int32_t output_height = out_shape.dim(1);
+  const int32_t output_width = out_shape.dim(2);
+
+  const int32_t unused_output_height =
+    computeOutputSize(params().padding, output_height, filter_height, params().stride_height, 1);
+  const int32_t unused_output_width =
+    computeOutputSize(params().padding, output_width, filter_width, params().stride_width, 1);
+
+  _padding_height =
+    computePadding(params().stride_height, 1, output_height, filter_height, unused_output_height);
+  _padding_width =
+    computePadding(params().stride_width, 1, output_width, filter_width, unused_output_width);
+
+  if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16)
+  {
+    auto scratch_tensor = getOutputTensors()[1];
+    scratch_tensor->resize(output()->shape());
+    const std::vector<double> real_multipliers =
+      getQuantizedConvolutionMultiplers(input()->scale(), filter()->scales(), output()->scale());
+
+    _quant_multipliers = quantizeMultipliers(real_multipliers);
+  }
+  else
+  {
+    auto scratch_tensor = getOutputTensors()[1];
+    scratch_tensor->set_allocatable(false);
+  }
+}
+
+void TransposeConv::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      if (filter()->scales().size() == 1)
+      {
+        evalQuantized();
+      }
+      else if (filter()->scales().size() > 1)
+      {
+        LUCI_INTERPRETER_CHECK(filter()->shape().num_dims() == 4);
+        LUCI_INTERPRETER_CHECK(filter()->scales().size() ==
+                               static_cast<size_t>(filter()->shape().dim(0)));
+        evalQuantizedPerChannel();
+      }
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void TransposeConv::evalFloat() const
+{
+  tflite::ConvParams op_params{};
+  op_params.padding_type = tflite::PaddingType::kSame;
+  op_params.padding_values.height = _padding_height;
+  op_params.padding_values.width = _padding_width;
+  op_params.stride_height = params().stride_height;
+  op_params.stride_width = params().stride_width;
+  tflite::reference_ops::TransposeConv(op_params,                                                //
+                                       getTensorShape(input()), getTensorData<float>(input()),   //
+                                       getTensorShape(filter()), getTensorData<float>(filter()), //
+                                       getTensorShape(bias()), getTensorData<float>(bias()),     //
+                                       getTensorShape(output()), getTensorData<float>(output()), //
+                                       tflite::RuntimeShape(), nullptr);
+}
+
+void TransposeConv::evalQuantized() const
+{
+  tflite::ConvParams op_params{};
+  op_params.padding_type = tflite::PaddingType::kSame;
+  op_params.padding_values.height = _padding_height;
+  op_params.padding_values.width = _padding_width;
+  op_params.stride_height = params().stride_height;
+  op_params.stride_width = params().stride_width;
+  // The kernel expects input and filter zero points to be negated.
+  op_params.input_offset = -input()->zero_point();    // Note the '-'.
+  op_params.weights_offset = -filter()->zero_point(); // Note the '-'.
+  op_params.output_offset = output()->zero_point();
+  op_params.output_multiplier = _quant_multipliers[0].multiplier;
+  op_params.output_shift = _quant_multipliers[0].shift;
+  op_params.quantized_activation_min = std::numeric_limits<uint8_t>::min();
+  op_params.quantized_activation_max = std::numeric_limits<uint8_t>::max();
+
+  auto scratch_tensor = getOutputTensors()[1];
+
+  tflite::reference_ops::TransposeConv(op_params,                                                //
+                                       getTensorShape(input()), getTensorData<uint8>(input()),   //
+                                       getTensorShape(filter()), getTensorData<uint8>(filter()), //
+                                       getTensorShape(bias()), getTensorData<int32_t>(bias()),   //
+                                       getTensorShape(output()), getTensorData<uint8>(output()), //
+                                       tflite::RuntimeShape(), nullptr,                          //
+                                       getTensorData<int32_t>(scratch_tensor));
+}
+
+void TransposeConv::evalQuantizedPerChannel() const
+{
+  const auto *input_data = getTensorData<uint8_t>(input());
+  const auto *filter_data = getTensorData<uint8_t>(filter());
+  const auto *bias_data = getTensorData<int32_t>(bias());
+  auto *output_data = getTensorData<uint8_t>(output());
+
+  auto scratch_tensor = getOutputTensors()[1];
+  auto *scratch_data = getTensorData<int32_t>(scratch_tensor);
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  const Shape &output_shape = output()->shape();
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t input_depth = input_shape.dim(3);
+  const int32_t output_depth = filter_shape.dim(0);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  const int32_t output_height = output_shape.dim(1);
+  const int32_t output_width = output_shape.dim(2);
+
+  const int32_t stride_height = _params.stride_height;
+  const int32_t stride_width = _params.stride_width;
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max);
+
+  std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int32_t));
+
+  BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers);
+  for (int32_t batch = 0; batch < batches; ++batch)
+  {
+    for (int32_t in_y = 0; in_y < input_height; ++in_y)
+    {
+      for (int32_t in_x = 0; in_x < input_width; ++in_x)
+      {
+        for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+        {
+          const int32_t out_y_origin = in_y * stride_height - _padding_height;
+          const int32_t out_x_origin = in_x * stride_width - _padding_width;
+          for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+          {
+            for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+            {
+              const int32_t out_x = out_x_origin + filter_x;
+              const int32_t out_y = out_y_origin + filter_y;
+              if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width))
+              {
+                for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+                {
+                  const uint8_t input_val =
+                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+                  const uint8_t filter_val =
+                    filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
+                  scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] +=
+                    static_cast<int32_t>(input_val - input()->zero_point()) *
+                    static_cast<int32_t>(filter_val - filter()->zero_points()[out_c]);
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    for (int32_t out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int32_t out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+        {
+          int32_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)];
+          if (bias_data)
+          {
+            acc += bias_data[out_c];
+          }
+
+          int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
+            acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
+
+          scaled_acc += output()->zero_point();
+          scaled_acc = std::max(scaled_acc, activation_min);
+          scaled_acc = std::min(scaled_acc, activation_max);
+
+          output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
+        }
+      }
+    }
+  }
+}
+
+void TransposeConv::evalQuantizedS16() const
+{
+  const auto *input_data = getTensorData<int16_t>(input());
+  const auto *filter_data = getTensorData<int16_t>(filter());
+  const auto *bias_data = getTensorData<int64_t>(bias());
+  auto *output_data = getTensorData<int16_t>(output());
+
+  auto scratch_tensor = getOutputTensors()[1];
+  auto *scratch_data = getTensorData<int64_t>(scratch_tensor);
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  const Shape &output_shape = output()->shape();
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t input_depth = input_shape.dim(3);
+  const int32_t output_depth = filter_shape.dim(0);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  const int32_t output_height = output_shape.dim(1);
+  const int32_t output_width = output_shape.dim(2);
+
+  const int32_t stride_height = _params.stride_height;
+  const int32_t stride_width = _params.stride_width;
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(Activation::NONE, output(), &activation_min, &activation_max);
+
+  std::memset(scratch_data, 0, scratch_tensor->shape().num_elements() * sizeof(int64_t));
+
+  BroadcastableWrapper<ChannelQuantMultipliers> output_multipliers(_quant_multipliers);
+  for (int32_t batch = 0; batch < batches; ++batch)
+  {
+    for (int32_t in_y = 0; in_y < input_height; ++in_y)
+    {
+      for (int32_t in_x = 0; in_x < input_width; ++in_x)
+      {
+        for (int32_t in_c = 0; in_c < input_depth; ++in_c)
+        {
+          const int32_t out_y_origin = in_y * stride_height - _padding_height;
+          const int32_t out_x_origin = in_x * stride_width - _padding_width;
+          for (int32_t filter_y = 0; filter_y < filter_height; ++filter_y)
+          {
+            for (int32_t filter_x = 0; filter_x < filter_width; ++filter_x)
+            {
+              const int32_t out_x = out_x_origin + filter_x;
+              const int32_t out_y = out_y_origin + filter_y;
+              if ((out_y >= 0 && out_y < output_height) && (out_x >= 0 && out_x < output_width))
+              {
+                for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+                {
+                  const int16_t input_val =
+                    input_data[calcOffset(input_shape, batch, in_y, in_x, in_c)];
+                  const int16_t filter_val =
+                    filter_data[calcOffset(filter_shape, out_c, filter_y, filter_x, in_c)];
+                  scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] +=
+                    static_cast<int64_t>(input_val) * static_cast<int64_t>(filter_val);
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    for (int32_t out_y = 0; out_y < output_height; ++out_y)
+    {
+      for (int32_t out_x = 0; out_x < output_width; ++out_x)
+      {
+        for (int32_t out_c = 0; out_c < output_depth; ++out_c)
+        {
+          int64_t acc = scratch_data[calcOffset(output_shape, batch, out_y, out_x, out_c)];
+          if (bias_data)
+          {
+            acc += bias_data[out_c];
+          }
+          int32_t scaled_acc = tflite::MultiplyByQuantizedMultiplier(
+            acc, output_multipliers[out_c].multiplier, output_multipliers[out_c].shift);
+
+          scaled_acc = std::max(scaled_acc, activation_min);
+          scaled_acc = std::min(scaled_acc, activation_max);
+
+          output_data[calcOffset(output_shape, batch, out_y, out_x, out_c)] = scaled_acc;
+        }
+      }
+    }
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.h b/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.h
new file mode 100644
index 000000000..cea0cf3c7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_TRANSPOSECONV_H
+#define LUCI_INTERPRETER_KERNELS_TRANSPOSECONV_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ChannelQuantMultipliers;
+
+class TransposeConv : public KernelWithParams<TransposeConvParams>
+{
+public:
+  TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
+                const Tensor *bias, Tensor *output, Tensor *scratch_tensor,
+                const TransposeConvParams &params);
+
+  ~TransposeConv();
+
+  const Tensor *output_shape() const { return _inputs[0]; }
+  const Tensor *filter() const { return _inputs[1]; }
+  const Tensor *input() const { return _inputs[2]; }
+  const Tensor *bias() const { return _inputs[3]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void evalQuantizedPerChannel() const;
+  void evalQuantizedS16() const;
+
+private:
+  int32_t _padding_height{};
+  int32_t _padding_width{};
+  // The scaling factor from input to output (aka the 'real multiplier') can
+  // be represented as a fixed point multiplier plus a left shift.
+  std::vector<ChannelQuantMultipliers> _quant_multipliers;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_TRANSPOSECONV_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.test.cpp
new file mode 100644
index 000000000..4856e1b87
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/TransposeConv.test.cpp
@@ -0,0 +1,353 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TransposeConv.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T, typename B>
+void Check(std::initializer_list<int32_t> output_shape_shape,
+           std::initializer_list<int32_t> weight_shape, std::initializer_list<int32_t> input_shape,
+           std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<int32_t> output_shape_data, std::initializer_list<T> weight_data,
+           std::initializer_list<T> input_data, std::initializer_list<B> bias_data,
+           std::initializer_list<T> output_data, luci::Padding padding, int32_t stride_height,
+           int32_t stride_width)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  constexpr DataType element_type = getElementType<T>();
+  Tensor output_shape_tensor =
+    makeInputTensor<DataType::S32>(output_shape_shape, output_shape_data, memory_manager.get());
+  Tensor weight_tensor =
+    makeInputTensor<element_type>(weight_shape, weight_data, memory_manager.get());
+  Tensor input_data_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+
+  DataType scratch_data_type = element_type == DataType::S16 ? DataType::S64 : DataType::S32;
+  Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  TransposeConvParams params{};
+  params.padding = padding;
+  params.stride_height = stride_height;
+  params.stride_width = stride_width;
+
+  if (bias_data.size() != 0)
+  {
+    Tensor bias_tensor =
+      makeInputTensor<getElementType<B>()>(bias_shape, bias_data, memory_manager.get());
+    TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, &bias_tensor,
+                         &output_tensor, &scratch_tensor, params);
+    kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    memory_manager->allocate_memory(scratch_tensor);
+    kernel.execute();
+  }
+  else
+  {
+    TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, nullptr,
+                         &output_tensor, &scratch_tensor, params);
+    kernel.configure();
+    memory_manager->allocate_memory(output_tensor);
+    memory_manager->allocate_memory(scratch_tensor);
+    kernel.execute();
+  }
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+}
+
+TEST(TransposeConvTest, FloatSimple)
+{
+  Check<float, float>(
+    /*output_shape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 1}, /*input_shape=*/{1, 4, 4, 1},
+    /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 4, 4, 1},
+    /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9},
+    /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+    /*bias_data=*/{},
+    /*output_data=*/{29, 62, 83, 75, 99, 192, 237, 198, 207, 372, 417, 330, 263, 446, 485, 365},
+    /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1);
+
+  SUCCEED();
+}
+
+TEST(TransposeConvTest, FloatTwoFiltersTest)
+{
+  Check<float, float>(
+    /*output_shape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 2}, /*input_shape=*/{1, 4, 4, 2},
+    /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 4, 4, 1},
+    /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18},
+    /*input_data=*/{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
+                    17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32},
+    /*bias_data=*/{},
+    /*output_data=*/
+    {184, 412, 568, 528, 678, 1347, 1689, 1434, 1494, 2715, 3057, 2442, 1968, 3352, 3652, 2760},
+    /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1);
+
+  SUCCEED();
+}
+
+TEST(TransposeConvTest, SimpleBiasTest)
+{
+  Check<float, float>(
+    /*output_shape_shape=*/{4}, /*weight_shape=*/{2, 3, 3, 1},
+    /*input_shape=*/{1, 2, 2, 1},
+    /*bias_shape=*/{2}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 5, 5, 2},
+    /*weight_data=*/{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18},
+    /*input_data=*/{1, 2, 3, 4},
+    /*bias_data=*/{3, 4},
+    /*output_data=*/{4,  6,  6,  8,  10, 14, 9,  12, 13, 16, 10,  12,  12, 14, 28, 32, 21,
+                     24, 25, 28, 19, 24, 27, 32, 65, 76, 45, 52,  57,  64, 24, 28, 30, 34,
+                     64, 72, 39, 44, 47, 52, 42, 46, 48, 52, 106, 114, 63, 68, 71, 76},
+    /*params.padding=*/luci::Padding::VALID, /*stride_height=*/2, /*stride_width=*/2);
+
+  SUCCEED();
+}
+
+TEST(TransposeConvTest, UInt8)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  std::vector<float> input_data{1, 2, 3, 4};
+  std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18};
+  std::vector<float> bias_data{3, 4};
+  std::vector<int32_t> output_shape_data{1, 5, 5, 2};
+  std::vector<float> ref_output_data{
+    4,  6,  6,  8,  10,  14,  9,  12, 13, 16, //
+    10, 12, 12, 14, 28,  32,  21, 24, 25, 28, //
+    19, 24, 27, 32, 65,  76,  45, 52, 57, 64, //
+    24, 28, 30, 34, 64,  72,  39, 44, 47, 52, //
+    42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
+  };
+
+  // Choose quantization parameters carefully.
+  auto input_quant = quantizationParams<uint8_t>(-8.0, 7.9375);  // s = 1 / 16, zp = 128
+  auto filter_quant = quantizationParams<uint8_t>(-24.0, 39.75); // s = 1 / 4, zp = 96
+  auto output_quant = quantizationParams<uint8_t>(-64.0, 191.0); // s = 1, zp = 64
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 2, 1}, input_quant.first, input_quant.second, input_data, memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::U8>(
+    {2, 3, 3, 1}, filter_quant.first, filter_quant.second, filter_data, memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>({2}, input_quant.first * filter_quant.first,
+                                                      0, bias_data, memory_manager.get());
+  Tensor output_shape_tensor =
+    makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant.first, output_quant.second);
+
+  DataType scratch_data_type =
+    input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+  Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
+
+  TransposeConvParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor,
+                       &output_tensor, &scratch_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  memory_manager->allocate_memory(scratch_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(TransposeConvTest, UInt8_CWQ)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  const int32_t output_channels = 2;
+  std::vector<float> input_data{1, 2, 3, 4};
+  std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18};
+  std::vector<float> bias_data{3, 4};
+  std::vector<int32_t> output_shape_data{1, 5, 5, 2};
+  std::vector<float> ref_output_data{
+    4,  6,  6,  8,  10,  14,  9,  12, 13, 16, //
+    10, 12, 12, 14, 28,  32,  21, 24, 25, 28, //
+    19, 24, 27, 32, 65,  76,  45, 52, 57, 64, //
+    24, 28, 30, 34, 64,  72,  39, 44, 47, 52, //
+    42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
+  };
+
+  // Choose quantization parameters carefully.
+  auto input_quant = quantizationParams<uint8_t>(-8.0, 7.9375);  // s = 1 / 16, zp = 128
+  auto output_quant = quantizationParams<uint8_t>(-64.0, 191.0); // s = 1, zp = 64
+
+  std::vector<std::pair<float, int32_t>> filter_quant_params;
+  filter_quant_params.push_back(quantizationParams<uint8_t>(0, 17));
+  filter_quant_params.push_back(quantizationParams<uint8_t>(0, 18));
+
+  std::vector<float> filter_scales;
+  std::vector<int32_t> filter_zerops;
+  for (auto iter : filter_quant_params)
+  {
+    filter_scales.push_back(iter.first);
+    filter_zerops.push_back(iter.second);
+  }
+
+  std::vector<float> bias_scales;
+  for (int i = 0; i < output_channels; ++i)
+    bias_scales.push_back(filter_quant_params[i].first * input_quant.first);
+  std::vector<int32_t> zerop(output_channels, 0);
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>(
+    {1, 2, 2, 1}, input_quant.first, input_quant.second, input_data, memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::U8>(
+    {output_channels, 3, 3, 1}, filter_scales, filter_zerops, 0, filter_data, memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S32>({output_channels}, bias_scales, zerop, 0,
+                                                      bias_data, memory_manager.get());
+  Tensor output_shape_tensor =
+    makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant.first, output_quant.second);
+
+  DataType scratch_data_type =
+    input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+  Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
+
+  TransposeConvParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor,
+                       &output_tensor, &scratch_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  memory_manager->allocate_memory(scratch_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(TransposeConvTest, SInt16)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  std::vector<float> input_data{1, 2, 3, 4};
+  std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18};
+  std::vector<float> bias_data{3, 4};
+  std::vector<int32_t> output_shape_data{1, 5, 5, 2};
+  std::vector<float> ref_output_data{
+    4,  6,  6,  8,  10,  14,  9,  12, 13, 16, //
+    10, 12, 12, 14, 28,  32,  21, 24, 25, 28, //
+    19, 24, 27, 32, 65,  76,  45, 52, 57, 64, //
+    24, 28, 30, 34, 64,  72,  39, 44, 47, 52, //
+    42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
+  };
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>({1, 2, 2, 1}, 0.25, 0, input_data, memory_manager.get());
+  Tensor filter_tensor =
+    makeInputTensor<DataType::S16>({2, 3, 3, 1}, 0.2, 0, filter_data, memory_manager.get());
+  Tensor bias_tensor =
+    makeInputTensor<DataType::S64>({2}, 0.25 * 0.2, 0, bias_data, memory_manager.get());
+  Tensor output_shape_tensor =
+    makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+
+  DataType scratch_data_type =
+    input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+  Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
+
+  TransposeConvParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor,
+                       &output_tensor, &scratch_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  memory_manager->allocate_memory(scratch_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(TransposeConvTest, SInt16_CWQ_weights)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+
+  const int output_channels = 2;
+  const Shape input_shape{1, 2, 2, 1};
+  const Shape filter_shape{output_channels, 3, 3, 1};
+  const Shape bias_shape{output_channels};
+  std::vector<int32_t> output_shape_data{1, 5, 5, output_channels};
+
+  std::vector<float> input_data{1, 2, 3, 4};
+  std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18};
+  std::vector<float> bias_data{3, 4};
+
+  std::vector<float> ref_output_data{
+    4,  6,  6,  8,  10,  14,  9,  12, 13, 16, //
+    10, 12, 12, 14, 28,  32,  21, 24, 25, 28, //
+    19, 24, 27, 32, 65,  76,  45, 52, 57, 64, //
+    24, 28, 30, 34, 64,  72,  39, 44, 47, 52, //
+    42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
+  };
+
+  const float input_scale = 0.25;
+  const float output_scale = 0.5;
+  const std::vector<float> filter_scales{0.2f, 0.5f};
+  std::vector<float> bias_scales{filter_scales[0] * input_scale, filter_scales[1] * input_scale};
+  const std::vector<int32_t> zerop(2, 0);
+
+  Tensor input_tensor =
+    makeInputTensor<DataType::S16>(input_shape, input_scale, 0, input_data, memory_manager.get());
+  Tensor filter_tensor = makeInputTensor<DataType::S16>(filter_shape, filter_scales, zerop, 0,
+                                                        filter_data, memory_manager.get());
+  Tensor bias_tensor = makeInputTensor<DataType::S64>(bias_shape, bias_scales, zerop, 0, bias_data,
+                                                      memory_manager.get());
+  Tensor output_shape_tensor =
+    makeInputTensor<DataType::S32>({4}, output_shape_data, memory_manager.get());
+  Tensor output_tensor = makeOutputTensor(DataType::S16, output_scale, 0);
+
+  DataType scratch_data_type =
+    input_tensor.element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+  Tensor scratch_tensor(scratch_data_type, Shape({}), {}, "");
+
+  TransposeConvParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor,
+                       &output_tensor, &scratch_tensor, params);
+  kernel.configure();
+  memory_manager->allocate_memory(output_tensor);
+  memory_manager->allocate_memory(scratch_tensor);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.cpp
new file mode 100644
index 000000000..9127241c0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Unpack.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Unpack::Unpack(const Tensor *input, std::vector<Tensor *> outputs, const UnpackParams &params)
+  : KernelWithParams<UnpackParams>({input}, std::move(outputs), params)
+{
+}
+
+void Unpack::configure()
+{
+  const Shape &input_shape = input()->shape();
+
+  int axis = _params.axis;
+  if (axis < 0)
+    axis += input()->shape().num_dims();
+  assert(axis >= 0 && axis < input_shape.num_dims());
+
+  Shape output_shape(input_shape.num_dims() - 1);
+  int out_index = 0;
+  for (int in_index = 0; in_index < input_shape.num_dims(); ++in_index)
+  {
+    if (in_index != axis)
+      output_shape.dim(out_index++) = input_shape.dim(in_index);
+  }
+
+  for (Tensor *output : _outputs)
+  {
+    assert(output->element_type() == input()->element_type());
+    output->resize(output_shape);
+  }
+}
+
+template <typename T> void Unpack::executeImpl() const
+{
+  tflite::UnpackParams params{};
+  params.axis = _params.axis;
+  params.num_split = _outputs.size();
+  VectorOfTensors<T, false> all_outputs(_outputs);
+  tflite::reference_ops::Unpack<T>(params, getTensorShape(input()), getTensorData<T>(input()),
+                                   **all_outputs.shapes(), all_outputs.data());
+}
+
+void Unpack::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      return executeImpl<float>();
+    case DataType::U8:
+      return executeImpl<uint8_t>();
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.h b/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.h
new file mode 100644
index 000000000..f4a44ecad
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_UNPACK_H
+#define LUCI_INTERPRETER_KERNELS_UNPACK_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Unpack : public KernelWithParams<UnpackParams>
+{
+public:
+  Unpack(const Tensor *input, std::vector<Tensor *> outputs, const UnpackParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output(int index) const { return _outputs[index]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> void executeImpl() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_UNPACK_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.test.cpp
new file mode 100644
index 000000000..9384ddc83
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Unpack.test.cpp
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Unpack.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(int axis, Shape input_shape, std::initializer_list<T> input_data,
+           const std::vector<std::initializer_list<int32_t>> &exp_output_shape,
+           std::vector<std::initializer_list<T>> exp_output_data)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  constexpr DataType element_type = getElementType<T>();
+  const int num_outputs = input_shape.dim(axis < 0 ? axis + input_shape.num_dims() : axis);
+
+  Tensor input_tensor =
+    makeInputTensor<element_type>(input_shape, input_data, memory_manager.get());
+  std::vector<Tensor> output_tensors;
+  output_tensors.reserve(num_outputs);
+  for (int i = 0; i < num_outputs; ++i)
+  {
+    output_tensors.push_back(makeOutputTensor(element_type));
+  }
+
+  std::vector<Tensor *> output_tensor_ptrs(num_outputs);
+  for (int i = 0; i < num_outputs; ++i)
+  {
+    output_tensor_ptrs[i] = &output_tensors[i];
+  }
+
+  UnpackParams params{};
+  params.axis = axis;
+
+  Unpack kernel(&input_tensor, std::move(output_tensor_ptrs), params);
+  kernel.configure();
+  for (int i = 0; i < num_outputs; i++)
+  {
+    memory_manager->allocate_memory(output_tensors[i]);
+  }
+  kernel.execute();
+
+  for (int i = 0; i < num_outputs; ++i)
+  {
+    EXPECT_THAT(extractTensorData<T>(output_tensors[i]),
+                ::testing::ElementsAreArray(exp_output_data[i]));
+  }
+}
+
+template <typename T> class UnpackTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_SUITE(UnpackTest, DataTypes);
+
+TYPED_TEST(UnpackTest, ThreeOutputs)
+{
+  Check<TypeParam>(/*axis=*/0, /*input_shape=*/{3, 2},
+                   /*input_data=*/{1, 2, 3, 4, 5, 6},
+                   /*exp_output_shape=*/{{2}, {2}, {2}},
+                   /*exp_output_data=*/{{1, 2}, {3, 4}, {5, 6}});
+}
+
+TYPED_TEST(UnpackTest, ThreeOutputsAxisOne)
+{
+  Check<TypeParam>(/*axis=*/1, /*input_shape=*/{3, 2},
+                   /*input_data=*/{1, 2, 3, 4, 5, 6},
+                   /*exp_output_shape=*/{{3}, {3}},
+                   /*exp_output_data=*/{{1, 3, 5}, {2, 4, 6}});
+}
+
+TYPED_TEST(UnpackTest, ThreeOutputsNegativeAxisOne)
+{
+  Check<TypeParam>(/*axis=*/-1, /*input_shape=*/{3, 2},
+                   /*input_data=*/{1, 2, 3, 4, 5, 6},
+                   /*exp_output_shape=*/{{3}, {3}},
+                   /*exp_output_data=*/{{1, 3, 5}, {2, 4, 6}});
+}
+
+TYPED_TEST(UnpackTest, ThreeOutputsNegativeAxisTwo)
+{
+  Check<TypeParam>(/*axis=*/-2, /*input_shape=*/{3, 2},
+                   /*input_data=*/{1, 2, 3, 4, 5, 6},
+                   /*exp_output_shape=*/{{2}, {2}, {2}},
+                   /*exp_output_data=*/{{1, 2}, {3, 4}, {5, 6}});
+}
+
+TYPED_TEST(UnpackTest, OneOutput)
+{
+  Check<TypeParam>(/*axis=*/0, /*input_shape=*/{1, 6},
+                   /*input_data=*/{1, 2, 3, 4, 5, 6},
+                   /*exp_output_shape=*/{{6}},
+                   /*exp_output_data=*/{{1, 2, 3, 4, 5, 6}});
+}
+
+TYPED_TEST(UnpackTest, ThreeDimensionsTwoOutputs)
+{
+  Check<TypeParam>(/*axis=*/2, /*input_shape=*/{2, 2, 2},
+                   /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8},
+                   /*exp_output_shape=*/{{2, 2}, {2, 2}},
+                   /*exp_output_data=*/{{1, 3, 5, 7}, {2, 4, 6, 8}});
+}
+
+TYPED_TEST(UnpackTest, FiveDimensionsTwoOutputs)
+{
+  Check<TypeParam>(
+    /*axis=*/2, /*input_shape=*/{2, 2, 2, 2, 1},
+    /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+    /*exp_output_shape=*/{{2, 2, 2, 1}, {2, 2, 2, 1}},
+    /*exp_output_data=*/
+    {{1, 2, 5, 6, 9, 10, 13, 14}, {3, 4, 7, 8, 11, 12, 15, 16}});
+}
+
+TYPED_TEST(UnpackTest, VectorToScalar)
+{
+  Check<TypeParam>(/*axis=*/0, /*input_shape=*/{5},
+                   /*input_data=*/{1, 2, 3, 4, 5},
+                   /*exp_output_shape=*/{{}, {}, {}, {}, {}},
+                   /*exp_output_data=*/{{1}, {2}, {3}, {4}, {5}});
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Utils.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/Utils.cpp
new file mode 100644
index 000000000..5d8e5db83
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Utils.cpp
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Utils.h"
+
+#include <cassert>
+#include <cmath>
+#include <limits>
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+template <typename T>
+void calculateActivationRange(Activation activation, T *activation_min, T *activation_max)
+{
+  switch (activation)
+  {
+    case Activation::NONE:
+      *activation_min = std::numeric_limits<T>::lowest();
+      *activation_max = std::numeric_limits<T>::max();
+      break;
+    case Activation::RELU:
+      *activation_min = 0;
+      *activation_max = std::numeric_limits<T>::max();
+      break;
+    case Activation::RELU_N1_TO_1:
+      *activation_min = -1;
+      *activation_max = 1;
+      break;
+    case Activation::RELU6:
+      *activation_min = 0;
+      *activation_max = 6;
+      break;
+    default:
+      throw std::runtime_error("Unsupported activation.");
+  }
+}
+
+template void calculateActivationRange(Activation activation, float *activation_min,
+                                       float *activation_max);
+template void calculateActivationRange(Activation activation, int32_t *activation_min,
+                                       int32_t *activation_max);
+template void calculateActivationRange(Activation activation, int64_t *activation_min,
+                                       int64_t *activation_max);
+
+static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t qmin, int32_t qmax,
+                                                  const Tensor *output, int32_t *activation_min,
+                                                  int32_t *activation_max)
+{
+  const float scale = output->scale();
+  const int32_t zero_point = output->zero_point();
+
+  auto quantize = [scale, zero_point](float x) {
+    return zero_point + static_cast<int32_t>(std::round(x / scale));
+  };
+
+  switch (activation)
+  {
+    case Activation::NONE:
+    case Activation::TANH:
+      *activation_min = qmin;
+      *activation_max = qmax;
+      break;
+    case Activation::RELU:
+      *activation_min = std::max(qmin, quantize(0.0f));
+      *activation_max = qmax;
+      break;
+    case Activation::RELU_N1_TO_1:
+      *activation_min = std::max(qmin, quantize(-1.0f));
+      *activation_max = std::min(qmax, quantize(1.0f));
+      break;
+    case Activation::RELU6:
+      *activation_min = std::max(qmin, quantize(0.0f));
+      *activation_max = std::min(qmax, quantize(6.0f));
+      break;
+    default:
+      throw std::runtime_error("Unsupported activation.");
+  }
+}
+
+void calculateActivationRangeQuantized(Activation activation, const Tensor *output,
+                                       int32_t *activation_min, int32_t *activation_max)
+{
+  assert(output->zero_points().size() == 1);
+  int32_t qmin{};
+  int32_t qmax{};
+  switch (output->element_type())
+  {
+    case DataType::U8:
+      qmin = 0;
+      qmax = std::numeric_limits<uint8_t>::max();
+      break;
+    case DataType::S8:
+      qmin = -std::numeric_limits<int8_t>::max();
+      qmax = std::numeric_limits<int8_t>::max();
+      break;
+    case DataType::S16:
+      // For now, assume that signed int16 type implies signed symmetric quantization.
+      assert(output->zero_point() == 0);
+      qmin = -std::numeric_limits<int16_t>::max();
+      qmax = std::numeric_limits<int16_t>::max();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+
+  calculateActivationRangeQuantizedImpl(activation, qmin, qmax, output, activation_min,
+                                        activation_max);
+}
+
+void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
+{
+  if (double_multiplier == 0.0)
+  {
+    *quantized_multiplier = 0;
+    *shift = 0;
+    return;
+  }
+
+  const double q = std::frexp(double_multiplier, shift);
+  auto q_fixed = static_cast<int64_t>(std::round(q * (INT64_C(1) << 31)));
+
+  if (q_fixed == (INT64_C(1) << 31))
+  {
+    q_fixed /= 2;
+    ++*shift;
+  }
+  assert(q_fixed <= std::numeric_limits<int32_t>::max());
+  // A shift amount smaller than -31 would cause all bits to be shifted out
+  // and thus all results would be zero. We implement that instead with
+  // q_fixed==0, so as to avoid hitting issues with right-shift
+  // operations with shift amounts greater than 31. Note that this happens
+  // roughly when abs(double_multiplier) < 2^-31 and the present handling means
+  // that we're effectively flushing tiny double_multiplier's to zero.
+  // We could conceivably handle values in the range (roughly) [32, 63]
+  // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
+  // the present handling is just doing 'flush denormals to zero'. We could
+  // reconsider and actually generate nonzero denormals if a need arises.
+  if (*shift < -31)
+  {
+    *shift = 0;
+    q_fixed = 0;
+  }
+  *quantized_multiplier = static_cast<int32_t>(q_fixed);
+}
+
+void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier,
+                                         int *left_shift)
+{
+  assert(double_multiplier < 1.0);
+  assert(double_multiplier > 0.0);
+  int shift;
+  quantizeMultiplier(double_multiplier, quantized_multiplier, &shift);
+  assert(shift <= 0);
+  *left_shift = shift;
+}
+
+Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_shape)
+{
+  const int num_input1_dims = input1_shape.num_dims();
+  const int num_input2_dims = input2_shape.num_dims();
+  const int num_out_dims = std::max(num_input1_dims, num_input2_dims);
+  Shape output_shape(num_out_dims);
+
+  for (int i = 0; i < num_out_dims; ++i)
+  {
+    const int32_t input1_dim = i < num_input1_dims ? input1_shape.dim(num_input1_dims - i - 1) : 1;
+    const int32_t input2_dim = i < num_input2_dims ? input2_shape.dim(num_input2_dims - i - 1) : 1;
+
+    bool need_broadcast = input1_dim != input2_dim;
+    bool can_broadcast = input1_dim == 1 || input2_dim == 1;
+    LUCI_INTERPRETER_CHECK(!need_broadcast || can_broadcast);
+
+    output_shape.dim(num_out_dims - i - 1) = std::max(input1_dim, input2_dim);
+  }
+
+  return output_shape;
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/Utils.h b/compiler/luci-micro/luci-interpreter/src/kernels/Utils.h
new file mode 100644
index 000000000..ebeb20e66
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/Utils.h
@@ -0,0 +1,293 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_UTILS_H
+#define LUCI_INTERPRETER_KERNELS_UTILS_H
+
+#include "core/KernelParams.h"
+#include "luci_interpreter/core/Tensor.h"
+
+#include <tensorflow/lite/kernels/internal/types.h>
+
+#include <cassert>
+#include <cstdint>
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+#define LUCI_INTERPRETER_CHECK(cond)                                                         \
+  if (!(cond))                                                                               \
+    throw std::runtime_error(std::string(__FILE__) + ":" + std::to_string(__LINE__) + +"(" + \
+                             std::string(#cond) + ") was not true.");
+
+inline int32_t computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size,
+                              int32_t filter_size, int32_t out_size)
+{
+  const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+  const int32_t padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2;
+  return padding > 0 ? padding : 0;
+}
+
+inline int32_t computePaddingWithOffset(int32_t stride, int32_t dilation_rate, int32_t in_size,
+                                        int32_t filter_size, int32_t out_size, int32_t *offset)
+{
+  int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+  int32_t total_padding = ((out_size - 1) * stride + effective_filter_size - in_size);
+  total_padding = total_padding > 0 ? total_padding : 0;
+  *offset = total_padding % 2;
+  return total_padding / 2;
+}
+
+inline int32_t computeOutputSize(Padding padding, int32_t image_size, int32_t filter_size,
+                                 int32_t stride, int32_t dilation_rate = 1)
+{
+  const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+  switch (padding)
+  {
+    case Padding::SAME:
+      return (image_size + stride - 1) / stride;
+    case Padding::VALID:
+      return (image_size + stride - effective_filter_size) / stride;
+    default:
+      assert(false);
+      return 0;
+  }
+}
+
+inline int32_t calcOffset(const Shape &shape, int32_t d0, int32_t d1, int32_t d2, int32_t d3)
+{
+  return ((d0 * shape.dim(1) + d1) * shape.dim(2) + d2) * shape.dim(3) + d3;
+}
+
+template <typename T>
+void calculateActivationRange(Activation activation, T *activation_min, T *activation_max);
+
+void calculateActivationRangeQuantized(Activation activation, const Tensor *output,
+                                       int32_t *activation_min, int32_t *activation_max);
+
+template <typename T> constexpr bool one_of_types() { return false; }
+
+// Checks if T is equal to one of {U,Other} types
+template <typename T, typename U, typename... Other> constexpr bool one_of_types()
+{
+  return std::is_same<T, U>::value || one_of_types<T, Other...>();
+}
+
+/**
+ * Fills activation min and max parameters depending on given data type and activation
+ *
+ * T is a template parameter, so after optimization this code left with only required if case
+ *
+ * @tparam T data type of arithmetic operation output tensor
+ * @param params tflite params to fill
+ * @param activation luci_interpreter::Activation of arithmetic operation
+ */
+template <typename T>
+void fillArithmeticActivationRange(tflite::ArithmeticParams &p, Activation act)
+{
+  static_assert(one_of_types<T, float, int32_t, int64_t>(), "Unsupported dtype");
+
+  if (std::is_same<T, float>::value)
+    calculateActivationRange(act, &p.float_activation_min, &p.float_activation_max);
+  if (std::is_same<T, int32_t>::value)
+    calculateActivationRange(act, &p.quantized_activation_min, &p.quantized_activation_max);
+  else
+    calculateActivationRange(act, &p.int64_activation_min, &p.int64_activation_max);
+}
+
+// Decompose a double multiplier into a Q0.31 int32 representation of its
+// significand, and shift representation of its exponent.
+//
+// Handles an arbitrary positive multiplier. The 'shift' output-value is
+// basically the 'floating-point exponent' of the multiplier:
+// Negative for a right-shift (when the multiplier is <1), positive for a
+// left-shift (when the multiplier is >1)
+void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift);
+
+// Decompose a double multiplier into a Q0.31 int32 representation of its
+// significand, and shift representation of NEGATIVE its exponent ---
+// this is intended as a RIGHT-shift.
+//
+// Restricted to the case where the multiplier < 1 (and non-negative).
+void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier,
+                                         int *left_shift);
+
+Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_shape);
+
+inline double getQuantizedConvolutionMultipler(float input_scale, float filter_scale,
+                                               float output_scale)
+{
+  const double input_product_scale = static_cast<double>(input_scale * filter_scale);
+  LUCI_INTERPRETER_CHECK(input_product_scale >= 0);
+  return input_product_scale / static_cast<double>(output_scale);
+}
+
+// TODO rename getQuantizedConvolutionMultiplers to something more general
+// it is used for non conv operators too
+inline std::vector<double> getQuantizedConvolutionMultiplers(float input_scale,
+                                                             const std::vector<float> &filter_scale,
+                                                             float output_scale)
+{
+  std::vector<double> effective_output_scales;
+  size_t n = filter_scale.size();
+  effective_output_scales.reserve(n);
+  for (size_t i = 0; i < n; ++i)
+  {
+    effective_output_scales.push_back(
+      getQuantizedConvolutionMultipler(input_scale, filter_scale[i], output_scale));
+  }
+  return effective_output_scales;
+}
+
+struct ChannelQuantMultipliers
+{
+  int shift;
+  int32_t multiplier;
+  ChannelQuantMultipliers() = default;
+};
+
+inline std::vector<ChannelQuantMultipliers>
+quantizeMultipliers(const std::vector<double> &effective_scale)
+{
+  size_t n = effective_scale.size();
+  std::vector<ChannelQuantMultipliers> params(n);
+  for (size_t i = 0; i < n; ++i)
+  {
+    quantizeMultiplier(effective_scale[i], &params[i].multiplier, &params[i].shift);
+  }
+  return params;
+}
+
+// Helper wrapper to hide broadcast logic
+template <typename T> class BroadcastableWrapper
+{
+public:
+  BroadcastableWrapper(const std::vector<T> &v) : _v(v), _stride(v.size() == 1 ? 0 : 1) {}
+
+  T operator[](int idx) { return _v[idx * _stride]; }
+
+private:
+  const std::vector<T> &_v;
+  int _stride;
+};
+
+inline tflite::RuntimeShape getTensorShape(const Tensor *tensor)
+{
+  if (tensor == nullptr)
+    return tflite::RuntimeShape();
+
+  const Shape &shape = tensor->shape();
+  tflite::RuntimeShape runtime_shape(shape.num_dims());
+  for (int i = 0; i < shape.num_dims(); ++i)
+  {
+    runtime_shape.SetDim(i, shape.dim(i));
+  }
+  return runtime_shape;
+}
+
+template <typename T> const T *getTensorData(const Tensor *tensor)
+{
+  return tensor != nullptr ? tensor->data<T>() : nullptr;
+}
+
+template <typename T> T *getTensorData(Tensor *tensor)
+{
+  return tensor != nullptr ? tensor->data<T>() : nullptr;
+}
+
+// A list of tensors in a format that can be used by kernels like split and
+// concatenation.
+template <typename T, bool is_const> class VectorOfTensors
+{
+public:
+  using ElementT = typename std::conditional<is_const, const T, T>::type;
+  using TensorT = typename std::conditional<is_const, const Tensor, Tensor>::type;
+
+  // Build with the tensors in 'tensor_list'.
+  explicit VectorOfTensors(const std::vector<TensorT *> &tensor_list)
+  {
+    const int num_tensors = tensor_list.size();
+
+    all_data_.reserve(num_tensors);
+    all_shape_.reserve(num_tensors);
+    all_shape_ptr_.reserve(num_tensors);
+
+    for (TensorT *tensor : tensor_list)
+    {
+      all_data_.push_back(getTensorData<T>(tensor));
+      all_shape_.push_back(getTensorShape(tensor));
+    }
+
+    // Taking the pointer from inside a std::vector is only OK if the vector is
+    // never modified, so we populate all_shape in the previous loop and then we
+    // are free to grab iterators here.
+    for (tflite::RuntimeShape &shape : all_shape_)
+    {
+      all_shape_ptr_.push_back(&shape);
+    }
+  }
+  // Return a pointer to the data pointers of all tensors in the list. For
+  // example:
+  //   float* const* f = v.data();
+  //   f[0][1] is the second element of the first tensor.
+  ElementT *const *data() const { return all_data_.data(); }
+
+  // Return a pointer the shape pointers of all tensors in the list. For
+  // example:
+  //   const RuntimeShape* const* d = v.dims();
+  //   dims[1] are the dimensions of the second tensor in the list.
+  const tflite::RuntimeShape *const *shapes() const { return all_shape_ptr_.data(); }
+
+private:
+  std::vector<ElementT *> all_data_;
+  std::vector<tflite::RuntimeShape> all_shape_;
+  std::vector<tflite::RuntimeShape *> all_shape_ptr_;
+};
+
+// A list of quantized tensors in a format that can be used by kernels like
+// split and concatenation.
+template <bool is_const> class VectorOfQuantizedTensors : public VectorOfTensors<uint8_t, is_const>
+{
+public:
+  using typename VectorOfTensors<uint8_t, is_const>::TensorT;
+
+  // Build with the tensors in 'tensor_list'.
+  explicit VectorOfQuantizedTensors(const std::vector<TensorT *> &tensor_list)
+    : VectorOfTensors<uint8_t, is_const>(tensor_list)
+  {
+    for (TensorT *tensor : tensor_list)
+    {
+      zero_point_.push_back(tensor->zero_point());
+      scale_.push_back(tensor->scale());
+    }
+  }
+
+  const float *scale() const { return scale_.data(); }
+  const int32_t *zero_point() const { return zero_point_.data(); }
+
+private:
+  std::vector<int32_t> zero_point_;
+  std::vector<float> scale_;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_UTILS_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/While.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/While.cpp
new file mode 100644
index 000000000..153bd1a99
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/While.cpp
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/While.h"
+#include "kernels/Utils.h"
+
+#include <cstring>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+namespace
+{
+
+void copy(const std::vector<const Tensor *> &src, const std::vector<Tensor *> &dst)
+{
+  for (size_t i = 0; i < src.size(); ++i)
+  {
+    LUCI_INTERPRETER_CHECK(dst[i]->element_type() == src[i]->element_type());
+    dst[i]->resize(src[i]->shape());
+
+    const int32_t num_elements = src[i]->shape().num_elements();
+    const std::size_t element_size = getDataTypeSize(src[i]->element_type());
+    std::memcpy(dst[i]->data<void>(), src[i]->data<void>(), num_elements * element_size);
+  }
+}
+
+void copy(const std::vector<Tensor *> &src, const std::vector<Tensor *> &dst)
+{
+  std::vector<const Tensor *> const_src;
+  for (const auto &t : src)
+    const_src.push_back(t);
+  copy(const_src, dst);
+}
+
+// TODO: Think about how allocate memory for output in main graph
+void configureTensorsAllocations(const std::vector<Tensor *> &tensors, RuntimeGraph *run_graph)
+{
+  for (auto tensor : tensors)
+    run_graph->configureAllocations(tensor);
+}
+
+} // namespace
+
+While::While(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs,
+             RuntimeGraph *cond_graph, RuntimeGraph *body_graph)
+  : Kernel(std::move(inputs), std::move(outputs)), _cond_graph(cond_graph), _body_graph(body_graph)
+{
+}
+
+void While::configure()
+{
+  LUCI_INTERPRETER_CHECK(_body_graph->getInputTensors().size() == getInputTensors().size());
+  LUCI_INTERPRETER_CHECK(_body_graph->getOutputTensors().size() == getOutputTensors().size());
+  LUCI_INTERPRETER_CHECK(_body_graph->getOutputTensors().size() == getInputTensors().size());
+
+  LUCI_INTERPRETER_CHECK(_cond_graph->getInputTensors().size() == getInputTensors().size());
+
+  const auto &cond_outputs = _cond_graph->getOutputTensors();
+  LUCI_INTERPRETER_CHECK(cond_outputs.size() == 1)
+  LUCI_INTERPRETER_CHECK(cond_outputs[0]->element_type() == DataType::BOOL);
+}
+
+/**
+ * @note Dynamic shape such as {1, 0, 8} may fail in tensor->data()
+ */
+void While::execute() const
+{
+  const auto &cond_inputs = _cond_graph->getInputTensors();
+  const auto &cond_outputs = _cond_graph->getOutputTensors();
+
+  configureTensorsAllocations(cond_inputs, _cond_graph);
+
+  copy(getInputTensors(), cond_inputs);
+
+  const auto &body_inputs = _body_graph->getInputTensors();
+  const auto &body_outputs = _body_graph->getOutputTensors();
+
+  configureTensorsAllocations(body_inputs, _body_graph);
+
+  while (true)
+  {
+    _cond_graph->execute();
+
+    bool cond_value = cond_outputs[0]->data<bool>()[0];
+    if (!cond_value)
+      break;
+
+    copy(cond_inputs, body_inputs);
+
+    _body_graph->execute();
+
+    copy(body_outputs, cond_inputs);
+  }
+
+  copy(cond_inputs, getOutputTensors());
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/While.h b/compiler/luci-micro/luci-interpreter/src/kernels/While.h
new file mode 100644
index 000000000..f758df3f3
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/While.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_WHILE_H
+#define LUCI_INTERPRETER_KERNELS_WHILE_H
+
+#include "core/Kernel.h"
+#include "core/RuntimeGraph.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class While : public Kernel
+{
+public:
+  While(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs, RuntimeGraph *cond_graph,
+        RuntimeGraph *body_graph);
+
+  const Tensor *input(int index) const { return _inputs[index]; }
+  Tensor *output(int index) const { return _outputs[index]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  RuntimeGraph *const _cond_graph = nullptr;
+  RuntimeGraph *const _body_graph = nullptr;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_WHILE_H
diff --git a/compiler/luci-micro/luci-interpreter/src/kernels/While.test.cpp b/compiler/luci-micro/luci-interpreter/src/kernels/While.test.cpp
new file mode 100644
index 000000000..cb8f89130
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/kernels/While.test.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "core/RuntimeModule.h"
+#include "kernels/Add.h"
+#include "kernels/Less.h"
+#include "kernels/While.h"
+#include "kernels/TestUtils.h"
+#include "luci_interpreter/TestMemoryManager.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+RuntimeGraph *buildCondSubgraph(RuntimeModule *module, DataType dtype, Tensor *input_cond,
+                                IMemoryManager *memory_manager)
+{
+  RuntimeGraph *graph = module->addGraph(memory_manager);
+  Tensor *input =
+    graph->addTensor(std::make_unique<Tensor>(dtype, Shape{}, AffineQuantization{}, ""));
+  Tensor *output =
+    graph->addTensor(std::make_unique<Tensor>(DataType::BOOL, Shape{}, AffineQuantization{}, ""));
+
+  memory_manager->allocate_memory(*input);
+  memory_manager->allocate_memory(*output);
+
+  graph->setInputTensors({input});
+  graph->setOutputTensors({output});
+
+  graph->addKernel(std::make_unique<Less>(input, input_cond, output));
+
+  return graph;
+}
+
+RuntimeGraph *buildBodySubgraph(RuntimeModule *module, DataType dtype, Tensor *input_add,
+                                IMemoryManager *memory_manager)
+{
+  RuntimeGraph *graph = module->addGraph(memory_manager);
+  Tensor *input =
+    graph->addTensor(std::make_unique<Tensor>(dtype, Shape{}, AffineQuantization{}, ""));
+  Tensor *output =
+    graph->addTensor(std::make_unique<Tensor>(dtype, Shape{}, AffineQuantization{}, ""));
+
+  memory_manager->allocate_memory(*input);
+  memory_manager->allocate_memory(*output);
+
+  graph->setInputTensors({input});
+  graph->setOutputTensors({output});
+
+  AddParams params{};
+  params.activation = Activation::NONE;
+  graph->addKernel(std::make_unique<Add>(input, input_add, output, params));
+
+  return graph;
+}
+
+TEST(WhileTest, FloatLoop10)
+{
+  std::unique_ptr<IMemoryManager> memory_manager = std::make_unique<TestMemoryManager>();
+  Tensor input = makeInputTensor<DataType::FLOAT32>({1}, {1}, memory_manager.get());
+  Tensor output = makeOutputTensor(DataType::FLOAT32);
+
+  Tensor input_cond = makeInputTensor<DataType::FLOAT32>({1}, {10}, memory_manager.get());
+  Tensor input_add = makeInputTensor<DataType::FLOAT32>({1}, {1}, memory_manager.get());
+
+  RuntimeModule module(nullptr);
+  RuntimeGraph *cond_graph =
+    buildCondSubgraph(&module, DataType::FLOAT32, &input_cond, memory_manager.get());
+  RuntimeGraph *body_graph =
+    buildBodySubgraph(&module, DataType::FLOAT32, &input_add, memory_manager.get());
+
+  While kernel({&input}, {&output}, cond_graph, body_graph);
+  kernel.configure();
+  memory_manager->allocate_memory(output);
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output), FloatArrayNear({10}));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/CMakeLists.txt b/compiler/luci-micro/luci-interpreter/src/loader/CMakeLists.txt
new file mode 100644
index 000000000..292771592
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/CMakeLists.txt
@@ -0,0 +1,39 @@
+set(SOURCES
+    GraphLoader.h
+    GraphLoader.cpp
+    KernelBuilderHelper.h
+    KernelBuilderHelper.cpp
+    KernelBuilder.h
+    KernelBuilder.cpp
+    ModuleLoader.h
+    ModuleLoader.cpp
+    RuntimeToIR.h
+    nodes/Builders.h)
+
+# include kernel specific builders
+macro(REGISTER_KERNEL NODE)
+  list(APPEND SOURCES "nodes/${NODE}.cpp")
+endmacro(REGISTER_KERNEL)
+include(${KERNEL_REGISTER_FILE})
+
+add_library(${LUCI_INTERPRETER_LOADER} STATIC ${SOURCES})
+if (NOT NNCC_LIBRARY_NO_PIC)
+  set_target_properties(${LUCI_INTERPRETER_LOADER} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+endif(NOT NNCC_LIBRARY_NO_PIC)
+target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_PAL_DIR}")
+target_include_directories(${LUCI_INTERPRETER_LOADER} PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
+
+target_link_libraries(${LUCI_INTERPRETER_LOADER}
+        PUBLIC luci_lang ${LUCI_INTERPRETER_CORE}
+        PRIVATE ${LUCI_INTERPRETER_KERNELS} nncc_common luci_plan)
+
+if(NOT ENABLE_TEST)
+  return()
+endif(NOT ENABLE_TEST)
+
+nnas_find_package(GTest REQUIRED)
+
+set(TEST_SOURCES KernelBuilder.test.cpp)
+
+GTest_AddTest(${LUCI_INTERPRETER_LOADER}_test ${TEST_SOURCES})
+target_link_libraries(${LUCI_INTERPRETER_LOADER}_test ${LUCI_INTERPRETER_LOADER})
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.cpp b/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.cpp
new file mode 100644
index 000000000..40207090b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.cpp
@@ -0,0 +1,344 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loader/GraphLoader.h"
+
+#include "loader/KernelBuilder.h"
+
+#include <luci/Plan/CircleNodeExecutionPlan.h>
+#include <loco/IR/Algorithm.h>
+
+namespace luci_interpreter
+{
+namespace
+{
+
+template <typename NodeT> Shape getNodeShape(const NodeT *node)
+{
+  Shape shape(node->rank());
+  for (uint32_t i = 0; i < node->rank(); ++i)
+  {
+    shape.dim(i) = node->dim(i).value();
+  }
+  return shape;
+}
+
+template <DataType DT> const void *getNodeDataImpl(const luci::CircleConst *node, size_t *data_size)
+{
+  const size_t element_size = getDataTypeSize(DT);
+  const int32_t num_elements = node->size<DT>();
+
+  *data_size = num_elements * element_size;
+  if (*data_size > 0)
+  {
+    // FIXME There is no good way to get the pointer to the data currently.
+    return &node->at<DT>(0);
+  }
+  return nullptr;
+}
+
+const void *getNodeData(const luci::CircleConst *node, size_t *data_size)
+{
+  switch (node->dtype())
+  {
+    case DataType::U8:
+      return getNodeDataImpl<DataType::U8>(node, data_size);
+    case DataType::FLOAT32:
+      return getNodeDataImpl<DataType::FLOAT32>(node, data_size);
+    case DataType::S8:
+      return getNodeDataImpl<DataType::S8>(node, data_size);
+    case DataType::S16:
+      return getNodeDataImpl<DataType::S16>(node, data_size);
+    case DataType::S32:
+      return getNodeDataImpl<DataType::S32>(node, data_size);
+    case DataType::S64:
+      return getNodeDataImpl<DataType::S64>(node, data_size);
+    case DataType::BOOL:
+      return getNodeDataImpl<DataType::BOOL>(node, data_size);
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+const void *getNodeData(const luci::CircleCustom *node, size_t *data_size)
+{
+  if (node->custom_code() != "CircleReferencingConst")
+    return nullptr;
+
+  // helper struct which describes data loaded to custom_options of CircleReferencingConst node
+  // TODO move this struct to header
+  struct ConstDataReference
+  {
+    const uint8_t *data = nullptr;
+    uint32_t size = 0;
+  };
+
+  const auto &custom_options = node->custom_options();
+  const auto &const_data_ref = *reinterpret_cast<const ConstDataReference *>(custom_options.data());
+
+  *data_size = const_data_ref.size;
+  return const_data_ref.data;
+}
+
+bool isExecutableNode(const luci::CircleNode *node)
+{
+  switch (node->opcode())
+  {
+    // These nodes denote inputs / outputs of a graph.
+    case luci::CircleOpcode::CIRCLECONST:
+    case luci::CircleOpcode::CIRCLEINPUT:
+    case luci::CircleOpcode::CIRCLEOUTPUT:
+    case luci::CircleOpcode::CIRCLEOUTPUTEXCLUDE:
+    // The following nodes denote outputs of multiple-output nodes.
+    case luci::CircleOpcode::CIRCLEBIDIRECTIONAL_SEQUENCE_LSTM_OUT:
+    case luci::CircleOpcode::CIRCLECUSTOMOUT:
+    case luci::CircleOpcode::CIRCLEIFOUT:
+    case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV4OUT:
+    case luci::CircleOpcode::CIRCLENONMAXSUPPRESSIONV5OUT:
+    case luci::CircleOpcode::CIRCLESPLITOUT:
+    case luci::CircleOpcode::CIRCLESPLITVOUT:
+    case luci::CircleOpcode::CIRCLETOPKV2OUT:
+    case luci::CircleOpcode::CIRCLEUNIQUEOUT:
+    case luci::CircleOpcode::CIRCLEUNPACKOUT:
+    case luci::CircleOpcode::CIRCLEVARIABLE:
+    case luci::CircleOpcode::CIRCLEWHILEOUT:
+      return false;
+    // Custom nodes may be executable and non-executable
+    case luci::CircleOpcode::CUSTOM:
+    {
+      auto const custom_node = loco::must_cast<const luci::CircleCustom *>(node);
+
+      // TODO handle more non-executable Custom ops here
+      if (custom_node->custom_code() == "CircleReferencingConst")
+        return false;
+
+      return true;
+    }
+    default:
+      return true;
+  }
+}
+
+bool isTensorProducingNode(const luci::CircleNode *node)
+{
+  switch (node->opcode())
+  {
+    // Output nodes do not produce tensors.
+    case luci::CircleOpcode::CIRCLEOUTPUT:
+    // The following nodes are multiple-output nodes. They do not produce tensors, the tensors
+    // are produced by the corresponding *Out nodes instead.
+    case luci::CircleOpcode::BIDIRECTIONAL_SEQUENCE_LSTM:
+    case luci::CircleOpcode::CUSTOM:
+    case luci::CircleOpcode::IF:
+    case luci::CircleOpcode::NON_MAX_SUPPRESSION_V4:
+    case luci::CircleOpcode::NON_MAX_SUPPRESSION_V5:
+    case luci::CircleOpcode::SPLIT:
+    case luci::CircleOpcode::SPLIT_V:
+    case luci::CircleOpcode::TOPK_V2:
+    case luci::CircleOpcode::UNIQUE:
+    case luci::CircleOpcode::UNPACK:
+    case luci::CircleOpcode::WHILE:
+      return false;
+    default:
+      return true;
+  }
+}
+
+bool isSupportedCustomNode(const luci::CircleNode *node)
+{
+  const auto custom_node = loco::must_cast<const luci::CircleCustom *>(node);
+
+  // TODO handle more Custom ops here
+  if (custom_node->custom_code() == "CircleReferencingConst")
+    return true;
+
+  return false;
+}
+
+} // namespace
+
+GraphLoader::GraphLoader(
+  const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
+  const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
+  std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor, IMemoryManager *memory_manager)
+  : _graph(graph), _runtime_graph(runtime_graph), _runtime_to_ir(runtime_to_ir),
+    _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor),
+    _memory_manager(memory_manager)
+{
+}
+
+void GraphLoader::loadTensors()
+{
+  for (uint32_t i = 0; i < _graph->nodes()->size(); ++i)
+  {
+    const auto *node = loco::must_cast<const luci::CircleNode *>(_graph->nodes()->at(i));
+
+    if (node->opcode() == luci::CircleOpcode::CUSTOM && !isSupportedCustomNode(node))
+      throw std::runtime_error("Unsupported Custom operator. " + node->name());
+
+    if (!isTensorProducingNode(node))
+      continue;
+
+    // Only Input, Const, Custom and Variable nodes have shapes. Shapes of intermediate tensors will
+    // be inferred.
+    Shape shape{};
+    switch (node->opcode())
+    {
+      case luci::CircleOpcode::CIRCLECONST:
+      case luci::CircleOpcode::CIRCLECUSTOMOUT:
+      case luci::CircleOpcode::CIRCLEINPUT:
+      case luci::CircleOpcode::CIRCLEVARIABLE:
+        shape = getNodeShape(node);
+        break;
+      default:
+        break;
+    }
+
+    AffineQuantization quantization;
+    if (node->quantparam() != nullptr)
+    {
+      const luci::CircleQuantParam *params = node->quantparam();
+      assert(params->scale.size() == params->zerop.size());
+      quantization.scale.assign(params->scale.cbegin(), params->scale.cend());
+      quantization.zero_point.assign(params->zerop.cbegin(), params->zerop.cend());
+      quantization.quantized_dimension = params->quantized_dimension;
+    }
+
+    auto tensor = std::make_unique<Tensor>(node->dtype(), std::move(shape), std::move(quantization),
+                                           node->name());
+
+    // If node has execution plan then read memory offsets for nodes
+    // from the beginning of shared memory buffer. Used in Static Memory Manager.
+    if (luci::has_execution_plan(node))
+    {
+      auto execution_plan = luci::get_execution_plan(node);
+      assert(!execution_plan.offsets().empty());
+      tensor->set_offset(execution_plan.offsets().front());
+    }
+
+    if (const auto *const_node = dynamic_cast<const luci::CircleConst *>(node))
+    {
+      size_t data_size{};
+      const void *const_data = getNodeData(const_node, &data_size);
+      if (const_data != nullptr)
+      {
+        _memory_manager->allocate_memory(*tensor);
+        tensor->writeData(const_data, data_size);
+      }
+    }
+    else if (const auto *custom_out_node = dynamic_cast<const luci::CircleCustomOut *>(node))
+    {
+      const auto *custom_node =
+        loco::must_cast<const luci::CircleCustom *>(custom_out_node->input());
+
+      if (custom_node->custom_code() == "CircleReferencingConst")
+      {
+        size_t data_size{};
+        const void *const_data = getNodeData(custom_node, &data_size);
+        if (const_data != nullptr)
+        {
+          _memory_manager->allocate_memory(*tensor);
+          tensor->writeData(const_data, data_size);
+        }
+      }
+    }
+
+    _node_to_tensor.emplace(node, tensor.get());
+    _runtime_to_ir.tensor_to_node.emplace(tensor.get(), node);
+
+    _runtime_graph->addTensor(std::move(tensor));
+  }
+}
+
+void GraphLoader::initInputOutputTensors() const
+{
+  auto input_nodes = loco::input_nodes(_graph);
+  std::vector<Tensor *> input_tensors(input_nodes.size());
+  for (size_t i = 0; i < input_nodes.size(); ++i)
+  {
+    input_tensors[i] = _node_to_tensor.at(input_nodes[i]);
+    _memory_manager->allocate_memory(*input_tensors[i]);
+  }
+  _runtime_graph->setInputTensors(input_tensors);
+
+  auto output_nodes = loco::output_nodes(const_cast<loco::Graph *>(_graph));
+  std::vector<Tensor *> output_tensors(output_nodes.size());
+  for (size_t i = 0; i < output_nodes.size(); ++i)
+  {
+    const auto *node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]);
+    output_tensors[i] = _node_to_tensor.at(node->from());
+  }
+  _runtime_graph->setOutputTensors(output_tensors);
+}
+
+void GraphLoader::loadOperators()
+{
+  KernelBuilder kernel_builder(_graph_to_runtime_graph, _node_to_tensor);
+
+  // Create kernels for executable nodes. This has to be done in execution order.
+  auto graph = const_cast<loco::Graph *>(_graph);
+
+  auto const graph_nodes = loco::all_nodes(graph);
+
+  // Checking for execution plan in node annotations.
+  bool has_execution_annotation = true;
+  auto const checking_exec_plan = [&has_execution_annotation](auto const node) {
+    const auto *circle_node = loco::must_cast<const luci::CircleNode *>(node);
+    if (!luci::has_execution_plan(circle_node))
+      has_execution_annotation = false;
+  };
+  std::for_each(begin(graph_nodes), end(graph_nodes), checking_exec_plan);
+
+  if (has_execution_annotation)
+  {
+    // Build ordered_nodes vector that stores the order of execution of graph nodes.
+    std::vector<const luci::CircleNode *> ordered_nodes(graph_nodes.size());
+
+    auto const filler = [&ordered_nodes](auto const node) {
+      const auto *circle_node = loco::must_cast<const luci::CircleNode *>(node);
+      auto const position = luci::get_execution_plan(circle_node).order_in_plan();
+      ordered_nodes.at(position) = circle_node;
+    };
+    std::for_each(begin(graph_nodes), end(graph_nodes), filler);
+
+    for (auto node : ordered_nodes)
+    {
+      if (isExecutableNode(node))
+      {
+        std::unique_ptr<Kernel> kernel = kernel_builder.build(node);
+        _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node);
+        _runtime_graph->addKernel(std::move(kernel));
+      }
+    }
+  }
+  else
+  {
+    // If it is impossible to build the execution order plan,
+    // then we use the default postorder_traversal approach.
+    for (const loco::Node *loco_node : loco::postorder_traversal(loco::output_nodes(graph)))
+    {
+      const auto *node = loco::must_cast<const luci::CircleNode *>(loco_node);
+      if (isExecutableNode(node))
+      {
+        std::unique_ptr<Kernel> kernel = kernel_builder.build(node);
+        _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node);
+        _runtime_graph->addKernel(std::move(kernel));
+      }
+    }
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.h b/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.h
new file mode 100644
index 000000000..fe066ecf8
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/GraphLoader.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_LOADER_GRAPHLOADER_H
+#define LUCI_INTERPRETER_LOADER_GRAPHLOADER_H
+
+#include "core/RuntimeGraph.h"
+#include "loader/RuntimeToIR.h"
+#include "luci_interpreter/MemoryManager.h"
+
+#include <loco/IR/Graph.h>
+
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+
+class GraphLoader
+{
+public:
+  GraphLoader(const loco::Graph *graph, RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
+              const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
+              std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor,
+              IMemoryManager *memory_manager);
+
+  void loadTensors();
+  void initInputOutputTensors() const;
+  void loadOperators();
+
+private:
+  const loco::Graph *_graph;
+  RuntimeGraph *_runtime_graph;
+  RuntimeToIR &_runtime_to_ir;
+  IMemoryManager *_memory_manager;
+
+  const std::unordered_map<const loco::Graph *, RuntimeGraph *> &_graph_to_runtime_graph;
+  std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_LOADER_GRAPHLOADER_H
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.cpp b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.cpp
new file mode 100644
index 000000000..8483a9a3d
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loader/KernelBuilder.h"
+#include "loader/nodes/Builders.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+#define CIRCLE_NODE(OPCODE, CLASS) CLASS,
+#define CIRCLE_VNODE(OPCODE, CLASS) CLASS,
+
+// This enum is auxiliary.
+// It is duplicate of luci::CircleOpcode but initialized with CLASS instead of OPCODE,
+// because list of target operators is in format of CLASS names
+enum class BuilderId
+{
+#include <luci/IR/CircleNodes.lst>
+  Size // casts to count of values in BuilderId enum
+};
+
+#undef CIRCLE_VNODE
+#undef CIRCLE_NODE
+
+/**
+ * @brief Registry of kernel builders
+ *
+ * This class contains mapping from Opcodes to kernel builder functions
+ */
+
+class KernelBuilderRegistry
+{
+public:
+  using KernelBuilderFunc = std::unique_ptr<Kernel>(const luci::CircleNode *,
+                                                    KernelBuilderHelper &);
+
+  KernelBuilderRegistry() : _operator_builders(size_t(BuilderId::Size), nullptr)
+  {
+#define REGISTER_KERNEL(name) \
+  register_kernel_builder(BuilderId::Circle##name, build_kernel_Circle##name);
+
+#include "KernelsToBuild.lst"
+
+#undef REGISTER_KERNEL
+  }
+
+  KernelBuilderFunc *get_kernel_builder_func(luci::CircleOpcode opcode) const
+  {
+    return _operator_builders.at(size_t(opcode));
+  }
+
+private:
+  std::vector<KernelBuilderFunc *> _operator_builders;
+
+  void register_kernel_builder(BuilderId id, KernelBuilderFunc *func)
+  {
+    // Using BuilderId is a duplicate of luci::CirclreOpcode,
+    // size_t(id) is equal to size_t(corresponding operation opcode).
+    assert(size_t(id) < _operator_builders.size());
+    _operator_builders[size_t(id)] = func;
+  }
+};
+
+KernelBuilder::KernelBuilder(
+  const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
+  const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
+  : KernelBuilderHelper(graph_to_runtime_graph, node_to_tensor)
+{
+  _builder_registry = std::make_unique<KernelBuilderRegistry>();
+}
+
+KernelBuilder::~KernelBuilder()
+{
+  // Need to define in this CPP to hide KernelBuilderRegistry internals.
+  // This destructor deletes _builder_registry
+}
+
+std::unique_ptr<Kernel> KernelBuilder::build(const luci::CircleNode *node)
+{
+  auto specific_builder = _builder_registry->get_kernel_builder_func(node->opcode());
+  if (specific_builder != nullptr)
+    return specific_builder(node, *this);
+
+  std::string msg = "Unsupported operator: ";
+  msg += std::to_string(static_cast<uint32_t>(node->opcode())) + " " + std::string(node->name());
+  throw std::invalid_argument(msg.c_str());
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.h b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.h
new file mode 100644
index 000000000..b1f383394
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_LOADER_KERNELBUILDER_H
+#define LUCI_INTERPRETER_LOADER_KERNELBUILDER_H
+
+#include "loader/KernelBuilderHelper.h"
+
+#include "core/Kernel.h"
+#include "core/RuntimeGraph.h"
+
+#include <luci/IR/CircleNodeVisitor.h>
+
+#include <memory>
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+
+class KernelBuilderRegistry;
+
+class KernelBuilder : public KernelBuilderHelper
+{
+public:
+  KernelBuilder(
+    const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
+    const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor);
+
+  ~KernelBuilder();
+
+  std::unique_ptr<Kernel> build(const luci::CircleNode *node);
+
+private:
+  std::unique_ptr<KernelBuilderRegistry> _builder_registry;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_LOADER_KERNELBUILDER_H
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.test.cpp b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.test.cpp
new file mode 100644
index 000000000..b221b6921
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilder.test.cpp
@@ -0,0 +1,1376 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loader/GraphLoader.h"
+#include "loader/KernelBuilder.h"
+#include "luci_interpreter/SimpleMemoryManager.h"
+
+#include <kernels/Add.h>
+#include <kernels/ArgMax.h>
+#include <kernels/AveragePool2D.h>
+#include <kernels/BatchMatMul.h>
+#include <kernels/Cast.h>
+#include <kernels/Concatenation.h>
+#include <kernels/Conv2D.h>
+#include <kernels/DepthToSpace.h>
+#include <kernels/DepthwiseConv2D.h>
+#include <kernels/Div.h>
+#include <kernels/Elu.h>
+#include <kernels/Exp.h>
+#include <kernels/Floor.h>
+#include <kernels/FloorDiv.h>
+#include <kernels/Equal.h>
+#include <kernels/FullyConnected.h>
+#include <kernels/Greater.h>
+#include <kernels/GreaterEqual.h>
+#include <kernels/InstanceNorm.h>
+#include <kernels/L2Normalize.h>
+#include <kernels/L2Pool2D.h>
+#include <kernels/LeakyRelu.h>
+#include <kernels/Less.h>
+#include <kernels/LessEqual.h>
+#include <kernels/LocalResponseNormalization.h>
+#include <kernels/LogicalAnd.h>
+#include <kernels/LogicalNot.h>
+#include <kernels/LogicalOr.h>
+#include <kernels/Logistic.h>
+#include <kernels/LogSoftmax.h>
+#include <kernels/Maximum.h>
+#include <kernels/MaxPool2D.h>
+#include <kernels/Mean.h>
+#include <kernels/Minimum.h>
+#include <kernels/Mul.h>
+#include <kernels/Neg.h>
+#include <kernels/NotEqual.h>
+#include <kernels/OneHot.h>
+#include <kernels/Pad.h>
+#include <kernels/PadV2.h>
+#include <kernels/Pow.h>
+#include <kernels/PRelu.h>
+#include <kernels/Relu.h>
+#include <kernels/Relu6.h>
+#include <kernels/Reshape.h>
+#include <kernels/ResizeBilinear.h>
+#include <kernels/ResizeNearestNeighbor.h>
+#include <kernels/ReverseV2.h>
+#include <kernels/Rsqrt.h>
+#include <kernels/Slice.h>
+#include <kernels/Softmax.h>
+#include <kernels/SpaceToDepth.h>
+#include <kernels/Split.h>
+#include <kernels/SplitV.h>
+#include <kernels/Sqrt.h>
+#include <kernels/SquaredDifference.h>
+#include <kernels/Squeeze.h>
+#include <kernels/StridedSlice.h>
+#include <kernels/Sub.h>
+#include <kernels/Tanh.h>
+#include <kernels/Transpose.h>
+#include <kernels/TransposeConv.h>
+#include <kernels/Unpack.h>
+
+#include <gmock/gmock.h>
+
+namespace luci_interpreter
+{
+namespace
+{
+
+using namespace testing;
+
+class KernelBuilderTest : public Test
+{
+protected:
+  luci::CircleInput *createInputNode() { return createNode<luci::CircleInput>(); }
+  void SetUp() override { _memory_manager = std::make_unique<SimpleMemoryManager>(); }
+
+  std::unique_ptr<IMemoryManager> _memory_manager;
+
+  template <typename NodeT, typename... Args> NodeT *createNode(Args &&... args)
+  {
+    auto *node = _graph.nodes()->create<NodeT>(std::forward<Args>(args)...);
+    // The actual type does not matter for the purpose of the tests.
+    // NOTE The type is meaningless for nodes with multiple outputs (corresponding *Out nodes carry
+    //  actual output types).
+    node->dtype(loco::DataType::FLOAT32);
+    return node;
+  }
+
+  template <typename NodeOutT> NodeOutT *createNodeOut(loco::Node *node, int index)
+  {
+    auto *node_out = createNode<NodeOutT>();
+    node_out->input(node);
+    node_out->index(index);
+    return node_out;
+  }
+
+  template <typename KernelT> std::unique_ptr<KernelT> buildKernel(const luci::CircleNode *op)
+  {
+    std::unordered_map<const loco::Graph *, RuntimeGraph *> graph_to_runtime_graph;
+
+    RuntimeGraph runtime_graph(nullptr, _memory_manager.get());
+    graph_to_runtime_graph[&_graph] = &runtime_graph;
+    RuntimeToIR runtime_to_ir;
+    GraphLoader graph_loader(&_graph, &runtime_graph, runtime_to_ir, graph_to_runtime_graph,
+                             _node_to_tensor, _memory_manager.get());
+    graph_loader.loadTensors();
+
+    KernelBuilder kernel_builder(graph_to_runtime_graph, _node_to_tensor);
+
+    auto kernel = kernel_builder.build(op);
+    return std::unique_ptr<KernelT>(dynamic_cast<KernelT *>(kernel.release()));
+  }
+
+  void checkTensor(const Tensor *tensor, const loco::Node *node)
+  {
+    EXPECT_THAT(tensor, Eq(_node_to_tensor.at(node)));
+  }
+
+private:
+  loco::Graph _graph;
+  std::unordered_map<const loco::Node *, Tensor *> _node_to_tensor;
+};
+
+TEST_F(KernelBuilderTest, Add)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CircleAdd>();
+  op->x(input1);
+  op->y(input2);
+
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::Add>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input1(), input1);
+  checkTensor(kernel->input2(), input2);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, ArgMax)
+{
+  auto *input = createInputNode();
+  auto *axis = createInputNode();
+
+  auto *op = createNode<luci::CircleArgMax>();
+  op->input(input);
+  op->dimension(axis);
+
+  op->output_type(loco::DataType::FLOAT32);
+
+  auto kernel = buildKernel<kernels::ArgMax>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->axis(), axis);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().output_type, Eq(op->output_type()));
+}
+
+TEST_F(KernelBuilderTest, AveragePool2D)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleAveragePool2D>();
+  op->value(input);
+
+  op->padding(luci::Padding::SAME);
+  op->filter()->h(11);
+  op->filter()->w(13);
+  op->stride()->h(17);
+  op->stride()->w(19);
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::AveragePool2D>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
+  EXPECT_THAT(kernel->params().filter_height, Eq(op->filter()->h()));
+  EXPECT_THAT(kernel->params().filter_width, Eq(op->filter()->w()));
+  EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
+  EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, BatchMatMul)
+{
+  auto *lhs = createInputNode();
+  auto *rhs = createInputNode();
+
+  auto *op = createNode<luci::CircleBatchMatMul>();
+  op->x(lhs);
+  op->y(rhs);
+  op->adj_x(false);
+  op->adj_y(false);
+
+  auto kernel = buildKernel<kernels::BatchMatMul>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->x(), lhs);
+  checkTensor(kernel->y(), rhs);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().adj_x, Eq(op->adj_x()));
+  EXPECT_THAT(kernel->params().adj_y, Eq(op->adj_y()));
+}
+
+TEST_F(KernelBuilderTest, Cast)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleCast>();
+  op->x(input);
+
+  auto kernel = buildKernel<kernels::Cast>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Concatenation)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CircleConcatenation>(2);
+  op->values(0, input1);
+  op->values(1, input2);
+  op->axis(11);
+
+  auto kernel = buildKernel<kernels::Concatenation>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(0), input1);
+  checkTensor(kernel->input(1), input2);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().axis, Eq(op->axis()));
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, Conv2D)
+{
+  auto *input = createInputNode();
+  auto *filter = createInputNode();
+  auto *bias = createInputNode();
+
+  auto *op = createNode<luci::CircleConv2D>();
+  op->input(input);
+  op->filter(filter);
+  op->bias(bias);
+
+  op->padding(luci::Padding::SAME);
+  op->stride()->h(11);
+  op->stride()->w(13);
+  op->dilation()->h(17);
+  op->dilation()->w(19);
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::Conv2D>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->filter(), filter);
+  checkTensor(kernel->bias(), bias);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
+  EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
+  EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
+  EXPECT_THAT(kernel->params().dilation_height_factor, Eq(op->dilation()->h()));
+  EXPECT_THAT(kernel->params().dilation_width_factor, Eq(op->dilation()->w()));
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, DepthToSpace)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleDepthToSpace>();
+  op->input(input);
+
+  op->block_size(11);
+
+  auto kernel = buildKernel<kernels::DepthToSpace>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().block_size, Eq(op->block_size()));
+}
+
+TEST_F(KernelBuilderTest, DepthwiseConv2D)
+{
+  auto *input = createInputNode();
+  auto *filter = createInputNode();
+  auto *bias = createInputNode();
+
+  auto *op = createNode<luci::CircleDepthwiseConv2D>();
+  op->input(input);
+  op->filter(filter);
+  op->bias(bias);
+
+  op->padding(luci::Padding::SAME);
+  op->depthMultiplier(11);
+  op->stride()->h(13);
+  op->stride()->w(17);
+  op->dilation()->h(19);
+  op->dilation()->w(23);
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::DepthwiseConv2D>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->filter(), filter);
+  checkTensor(kernel->bias(), bias);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
+  EXPECT_THAT(kernel->params().depth_multiplier, Eq(op->depthMultiplier()));
+  EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
+  EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
+  EXPECT_THAT(kernel->params().dilation_height_factor, Eq(op->dilation()->h()));
+  EXPECT_THAT(kernel->params().dilation_width_factor, Eq(op->dilation()->w()));
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, Div)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CircleDiv>();
+  op->x(input1);
+  op->y(input2);
+
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::Div>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input1(), input1);
+  checkTensor(kernel->input2(), input2);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, Elu)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleElu>();
+  op->features(input);
+
+  auto kernel = buildKernel<kernels::Elu>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Exp)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleExp>();
+  op->x(input);
+
+  auto kernel = buildKernel<kernels::Exp>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Floor)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleFloor>();
+  op->x(input);
+
+  auto kernel = buildKernel<kernels::Floor>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, FloorDiv)
+{
+  auto *x = createInputNode();
+  auto *y = createInputNode();
+
+  auto *op = createNode<luci::CircleFloorDiv>();
+  op->x(x);
+  op->y(y);
+
+  auto kernel = buildKernel<kernels::FloorDiv>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->x(), x);
+  checkTensor(kernel->y(), y);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Equal)
+{
+  auto *x_input = createInputNode();
+  auto *y_input = createInputNode();
+
+  auto *op = createNode<luci::CircleEqual>();
+  op->x(x_input);
+  op->y(y_input);
+
+  auto kernel = buildKernel<kernels::Equal>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->x(), x_input);
+  checkTensor(kernel->y(), y_input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, FullyConnected)
+{
+  auto *input = createInputNode();
+  auto *weights = createInputNode();
+  auto *bias = createInputNode();
+
+  auto *op = createNode<luci::CircleFullyConnected>();
+  op->input(input);
+  op->weights(weights);
+  op->bias(bias);
+
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::FullyConnected>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->weights(), weights);
+  checkTensor(kernel->bias(), bias);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, Greater)
+{
+  auto *x_input = createInputNode();
+  auto *y_input = createInputNode();
+
+  auto *op = createNode<luci::CircleGreater>();
+  op->x(x_input);
+  op->y(y_input);
+
+  auto kernel = buildKernel<kernels::Greater>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->x(), x_input);
+  checkTensor(kernel->y(), y_input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, GreaterEqual)
+{
+  auto *x_input = createInputNode();
+  auto *y_input = createInputNode();
+
+  auto *op = createNode<luci::CircleGreaterEqual>();
+  op->x(x_input);
+  op->y(y_input);
+
+  auto kernel = buildKernel<kernels::GreaterEqual>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->x(), x_input);
+  checkTensor(kernel->y(), y_input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, InstanceNorm)
+{
+  auto *input = createInputNode();
+  auto *gamma = createInputNode();
+  auto *beta = createInputNode();
+
+  auto *op = createNode<luci::CircleInstanceNorm>();
+  op->input(input);
+  op->gamma(gamma);
+  op->beta(beta);
+
+  op->epsilon(1e-05);
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::InstanceNorm>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->gamma(), gamma);
+  checkTensor(kernel->beta(), beta);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().epsilon, Eq(op->epsilon()));
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, L2Normalize)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleL2Normalize>();
+  op->x(input);
+
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::L2Normalize>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, L2Pool2D)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleL2Pool2D>();
+  op->value(input);
+
+  op->padding(luci::Padding::SAME);
+  op->filter()->h(11);
+  op->filter()->w(13);
+  op->stride()->h(17);
+  op->stride()->w(19);
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::L2Pool2D>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
+  EXPECT_THAT(kernel->params().filter_height, Eq(op->filter()->h()));
+  EXPECT_THAT(kernel->params().filter_width, Eq(op->filter()->w()));
+  EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
+  EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, LeakyRelu)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleLeakyRelu>();
+  op->features(input);
+
+  op->alpha(11.0f);
+
+  auto kernel = buildKernel<kernels::LeakyRelu>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().alpha, Eq(op->alpha()));
+}
+
+TEST_F(KernelBuilderTest, Less)
+{
+  auto *x_input = createInputNode();
+  auto *y_input = createInputNode();
+
+  auto *op = createNode<luci::CircleLess>();
+  op->x(x_input);
+  op->y(y_input);
+
+  auto kernel = buildKernel<kernels::Less>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->x(), x_input);
+  checkTensor(kernel->y(), y_input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, LessEqual)
+{
+  auto *x_input = createInputNode();
+  auto *y_input = createInputNode();
+
+  auto *op = createNode<luci::CircleLessEqual>();
+  op->x(x_input);
+  op->y(y_input);
+
+  auto kernel = buildKernel<kernels::LessEqual>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->x(), x_input);
+  checkTensor(kernel->y(), y_input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, LocalResponseNormalization)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleLocalResponseNormalization>();
+  op->input(input);
+
+  op->radius(11);
+  op->bias(13.0f);
+  op->alpha(15.0f);
+  op->beta(17.0f);
+
+  auto kernel = buildKernel<kernels::LocalResponseNormalization>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().radius, Eq(op->radius()));
+  EXPECT_THAT(kernel->params().bias, Eq(op->bias()));
+  EXPECT_THAT(kernel->params().alpha, Eq(op->alpha()));
+  EXPECT_THAT(kernel->params().beta, Eq(op->beta()));
+}
+
+TEST_F(KernelBuilderTest, LogicalAnd)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CircleLogicalAnd>();
+  op->x(input1);
+  op->y(input2);
+
+  auto kernel = buildKernel<kernels::LogicalAnd>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input1(), input1);
+  checkTensor(kernel->input2(), input2);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, LogicalNot)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleLogicalNot>();
+  op->x(input);
+
+  auto kernel = buildKernel<kernels::LogicalNot>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, LogicalOr)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CircleLogicalOr>();
+  op->x(input1);
+  op->y(input2);
+
+  auto kernel = buildKernel<kernels::LogicalOr>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input1(), input1);
+  checkTensor(kernel->input2(), input2);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Logistic)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleLogistic>();
+  op->x(input);
+
+  auto kernel = buildKernel<kernels::Logistic>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, LogSoftmax)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleLogSoftmax>();
+  op->logits(input);
+
+  auto kernel = buildKernel<kernels::LogSoftmax>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Maximum)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CircleMaximum>();
+  op->x(input1);
+  op->y(input2);
+
+  auto kernel = buildKernel<kernels::Maximum>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input1(), input1);
+  checkTensor(kernel->input2(), input2);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, MaxPool2D)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleMaxPool2D>();
+  op->value(input);
+
+  op->padding(luci::Padding::SAME);
+  op->filter()->h(11);
+  op->filter()->w(13);
+  op->stride()->h(17);
+  op->stride()->w(19);
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::MaxPool2D>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
+  EXPECT_THAT(kernel->params().filter_height, Eq(op->filter()->h()));
+  EXPECT_THAT(kernel->params().filter_width, Eq(op->filter()->w()));
+  EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
+  EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, Mean)
+{
+  auto *input = createInputNode();
+  auto *axes = createInputNode();
+
+  auto *op = createNode<luci::CircleMean>();
+  op->input(input);
+  op->reduction_indices(axes);
+
+  op->keep_dims(true);
+
+  auto kernel = buildKernel<kernels::Mean>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->axes(), axes);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().keep_dims, Eq(op->keep_dims()));
+}
+
+TEST_F(KernelBuilderTest, Minimum)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CircleMinimum>();
+  op->x(input1);
+  op->y(input2);
+
+  auto kernel = buildKernel<kernels::Minimum>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input1(), input1);
+  checkTensor(kernel->input2(), input2);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Mul)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CircleMul>();
+  op->x(input1);
+  op->y(input2);
+
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::Mul>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input1(), input1);
+  checkTensor(kernel->input2(), input2);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, Neg)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleNeg>();
+  op->x(input);
+
+  auto kernel = buildKernel<kernels::Neg>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, NotEqual)
+{
+  auto *x_input = createInputNode();
+  auto *y_input = createInputNode();
+
+  auto *op = createNode<luci::CircleNotEqual>();
+  op->x(x_input);
+  op->y(y_input);
+
+  auto kernel = buildKernel<kernels::NotEqual>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->x(), x_input);
+  checkTensor(kernel->y(), y_input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, OneHot)
+{
+  auto *indices = createInputNode();
+  auto *depth = createInputNode();
+  auto *on_value = createInputNode();
+  auto *off_value = createInputNode();
+  auto axis = 1;
+
+  auto *op = createNode<luci::CircleOneHot>();
+  op->indices(indices);
+  op->depth(depth);
+  op->on_value(on_value);
+  op->off_value(off_value);
+  op->axis(axis);
+
+  auto kernel = buildKernel<kernels::OneHot>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->indices(), indices);
+  checkTensor(kernel->depth(), depth);
+  checkTensor(kernel->on_value(), on_value);
+  checkTensor(kernel->off_value(), off_value);
+  EXPECT_THAT(kernel->params().axis, Eq(op->axis()));
+}
+
+TEST_F(KernelBuilderTest, Pad)
+{
+  auto *input = createInputNode();
+  auto *paddings = createInputNode();
+
+  auto *op = createNode<luci::CirclePad>();
+  op->input(input);
+  op->paddings(paddings);
+
+  auto kernel = buildKernel<kernels::Pad>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->paddings(), paddings);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, PadV2)
+{
+  auto *input = createInputNode();
+  auto *paddings = createInputNode();
+  auto *constant_values = createInputNode();
+
+  auto *op = createNode<luci::CirclePadV2>();
+  op->input(input);
+  op->paddings(paddings);
+  op->constant_values(constant_values);
+
+  auto kernel = buildKernel<kernels::PadV2>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->paddings(), paddings);
+  checkTensor(kernel->constant_values(), constant_values);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Pow)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CirclePow>();
+  op->x(input1);
+  op->y(input2);
+
+  auto kernel = buildKernel<kernels::Pow>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input1(), input1);
+  checkTensor(kernel->input2(), input2);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, PRelu)
+{
+  auto *input = createInputNode();
+  auto *alpha = createInputNode();
+
+  auto *op = createNode<luci::CirclePRelu>();
+  op->input(input);
+  op->alpha(alpha);
+
+  auto kernel = buildKernel<kernels::PRelu>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->alpha(), alpha);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Relu)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleRelu>();
+  op->features(input);
+
+  auto kernel = buildKernel<kernels::Relu>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Relu6)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleRelu6>();
+  op->features(input);
+
+  auto kernel = buildKernel<kernels::Relu6>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Reshape)
+{
+  auto *input = createInputNode();
+  auto *shape = createInputNode();
+
+  auto *op = createNode<luci::CircleReshape>();
+  op->tensor(input);
+  op->shape(shape);
+
+  auto kernel = buildKernel<kernels::Reshape>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->shape(), shape);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, ResizeBilinear)
+{
+  auto *input = createInputNode();
+  auto *size = createInputNode();
+
+  auto *op = createNode<luci::CircleResizeBilinear>();
+  op->input(input);
+  op->size(size);
+  op->align_corners(true);
+  op->half_pixel_centers(true);
+
+  auto kernel = buildKernel<kernels::ResizeBilinear>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->size(), size);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().align_corners, Eq(op->align_corners()));
+  EXPECT_THAT(kernel->params().half_pixel_centers, Eq(op->half_pixel_centers()));
+}
+
+TEST_F(KernelBuilderTest, ResizeNearestNeighbor)
+{
+  auto *input = createInputNode();
+  auto *size = createInputNode();
+
+  auto *op = createNode<luci::CircleResizeNearestNeighbor>();
+  op->input(input);
+  op->size(size);
+  op->align_corners(true);
+
+  auto kernel = buildKernel<kernels::ResizeNearestNeighbor>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->size(), size);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().align_corners, Eq(op->align_corners()));
+  // TODO currently half_pixel_centers are not implemented on CircleResizeNearestNeighbor
+  // after adding, need to be updated.
+}
+
+TEST_F(KernelBuilderTest, ReverseV2)
+{
+  auto *input = createInputNode();
+  auto *axes = createInputNode();
+
+  auto *op = createNode<luci::CircleReverseV2>();
+  op->tensor(input);
+  op->axis(axes);
+
+  auto kernel = buildKernel<kernels::ReverseV2>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->axes(), axes);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Rsqrt)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleRsqrt>();
+  op->x(input);
+
+  auto kernel = buildKernel<kernels::Rsqrt>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Slice)
+{
+  auto *input = createInputNode();
+  auto *begin = createInputNode();
+  auto *size = createInputNode();
+
+  auto *op = createNode<luci::CircleSlice>();
+  op->input(input);
+  op->begin(begin);
+  op->size(size);
+
+  auto kernel = buildKernel<kernels::Slice>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->begin(), begin);
+  checkTensor(kernel->size(), size);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Softmax)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleSoftmax>();
+  op->logits(input);
+
+  op->beta(11.0f);
+
+  auto kernel = buildKernel<kernels::Softmax>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().beta, Eq(op->beta()));
+}
+
+TEST_F(KernelBuilderTest, SpaceToDepth)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleSpaceToDepth>();
+  op->input(input);
+
+  op->block_size(11);
+
+  auto kernel = buildKernel<kernels::SpaceToDepth>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().block_size, op->block_size());
+}
+
+TEST_F(KernelBuilderTest, Split)
+{
+  auto *axis = createInputNode();
+  auto *input = createInputNode();
+  auto *op = createNode<luci::CircleSplit>();
+  auto *output1 = createNodeOut<luci::CircleSplitOut>(op, 0);
+  auto *output2 = createNodeOut<luci::CircleSplitOut>(op, 1);
+
+  op->split_dim(axis);
+  op->input(input);
+
+  op->num_split(2);
+
+  auto kernel = buildKernel<kernels::Split>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->axis(), axis);
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(0), output1);
+  checkTensor(kernel->output(1), output2);
+}
+
+TEST_F(KernelBuilderTest, SplitV)
+{
+  auto *input = createInputNode();
+  auto *size_splits = createInputNode();
+  auto *axis = createInputNode();
+  auto *op = createNode<luci::CircleSplitV>();
+  auto *output0 = createNodeOut<luci::CircleSplitVOut>(op, 0);
+  auto *output1 = createNodeOut<luci::CircleSplitVOut>(op, 1);
+
+  op->input(input);
+  op->size_splits(size_splits);
+  op->split_dim(axis);
+
+  op->num_split(2);
+
+  auto kernel = buildKernel<kernels::SplitV>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->size_splits(), size_splits);
+  checkTensor(kernel->axis(), axis);
+  checkTensor(kernel->output(0), output0);
+  checkTensor(kernel->output(1), output1);
+}
+
+TEST_F(KernelBuilderTest, Sqrt)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleSqrt>();
+  op->x(input);
+
+  auto kernel = buildKernel<kernels::Sqrt>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, SquaredDifference)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CircleSquaredDifference>();
+  op->x(input1);
+  op->y(input2);
+
+  auto kernel = buildKernel<kernels::SquaredDifference>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input1(), input1);
+  checkTensor(kernel->input2(), input2);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Squeeze)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleSqueeze>();
+  op->input(input);
+
+  op->squeeze_dims({11, 13});
+
+  auto kernel = buildKernel<kernels::Squeeze>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().squeeze_dims, ElementsAreArray(op->squeeze_dims()));
+}
+
+TEST_F(KernelBuilderTest, StridedSlice)
+{
+  auto *input = createInputNode();
+  auto *begin = createInputNode();
+  auto *end = createInputNode();
+  auto *strides = createInputNode();
+
+  auto *op = createNode<luci::CircleStridedSlice>();
+  op->input(input);
+  op->begin(begin);
+  op->end(end);
+  op->strides(strides);
+
+  op->begin_mask(11);
+  op->ellipsis_mask(13);
+  op->end_mask(17);
+  op->new_axis_mask(19);
+  op->shrink_axis_mask(23);
+
+  auto kernel = buildKernel<kernels::StridedSlice>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->begin(), begin);
+  checkTensor(kernel->end(), end);
+  checkTensor(kernel->strides(), strides);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().begin_mask, Eq(op->begin_mask()));
+  EXPECT_THAT(kernel->params().ellipsis_mask, Eq(op->ellipsis_mask()));
+  EXPECT_THAT(kernel->params().end_mask, Eq(op->end_mask()));
+  EXPECT_THAT(kernel->params().new_axis_mask, Eq(op->new_axis_mask()));
+  EXPECT_THAT(kernel->params().shrink_axis_mask, Eq(op->shrink_axis_mask()));
+}
+
+TEST_F(KernelBuilderTest, Sub)
+{
+  auto *input1 = createInputNode();
+  auto *input2 = createInputNode();
+
+  auto *op = createNode<luci::CircleSub>();
+  op->x(input1);
+  op->y(input2);
+
+  op->fusedActivationFunction(luci::FusedActFunc::RELU);
+
+  auto kernel = buildKernel<kernels::Sub>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input1(), input1);
+  checkTensor(kernel->input2(), input2);
+  checkTensor(kernel->output(), op);
+  EXPECT_THAT(kernel->params().activation, Eq(op->fusedActivationFunction()));
+}
+
+TEST_F(KernelBuilderTest, Tanh)
+{
+  auto *input = createInputNode();
+
+  auto *op = createNode<luci::CircleTanh>();
+  op->x(input);
+
+  auto kernel = buildKernel<kernels::Tanh>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, Transpose)
+{
+  auto *input = createInputNode();
+  auto *perm = createInputNode();
+
+  auto *op = createNode<luci::CircleTranspose>();
+  op->a(input);
+  op->perm(perm);
+
+  auto kernel = buildKernel<kernels::Transpose>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->perm(), perm);
+  checkTensor(kernel->output(), op);
+}
+
+TEST_F(KernelBuilderTest, TransposeConv)
+{
+  auto *output_shape = createInputNode();
+  auto *filter = createInputNode();
+  auto *input = createInputNode();
+  auto *bias = createInputNode();
+
+  auto *op = createNode<luci::CircleTransposeConv>();
+  op->inputSizes(output_shape);
+  op->filter(filter);
+  op->outBackprop(input);
+  op->bias(bias);
+
+  op->padding(luci::Padding::SAME);
+  op->stride()->h(11);
+  op->stride()->w(13);
+
+  auto kernel = buildKernel<kernels::TransposeConv>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->output_shape(), output_shape);
+  checkTensor(kernel->filter(), filter);
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(), op);
+  checkTensor(kernel->bias(), bias);
+  EXPECT_THAT(kernel->params().padding, Eq(op->padding()));
+  EXPECT_THAT(kernel->params().stride_height, Eq(op->stride()->h()));
+  EXPECT_THAT(kernel->params().stride_width, Eq(op->stride()->w()));
+}
+
+TEST_F(KernelBuilderTest, Unpack)
+{
+  auto *input = createInputNode();
+  auto *op = createNode<luci::CircleUnpack>();
+  auto *output1 = createNodeOut<luci::CircleUnpackOut>(op, 0);
+  auto *output2 = createNodeOut<luci::CircleUnpackOut>(op, 1);
+
+  op->value(input);
+
+  op->num(2);
+  op->axis(11);
+
+  auto kernel = buildKernel<kernels::Unpack>(op);
+  ASSERT_THAT(kernel, NotNull());
+
+  checkTensor(kernel->input(), input);
+  checkTensor(kernel->output(0), output1);
+  checkTensor(kernel->output(1), output2);
+  EXPECT_THAT(kernel->params().axis, Eq(op->axis()));
+}
+
+TEST_F(KernelBuilderTest, NonExisting1_NEG)
+{
+  auto *op = createNode<luci::CircleConst>();
+  ASSERT_ANY_THROW(buildKernel<Kernel>(op));
+}
+
+TEST_F(KernelBuilderTest, NonExisting2_NEG)
+{
+  auto *op = createNode<luci::CircleInput>();
+  ASSERT_ANY_THROW(buildKernel<Kernel>(op));
+}
+
+TEST_F(KernelBuilderTest, NonExisting3_NEG)
+{
+  auto *op = createNode<luci::CircleOutput>();
+  ASSERT_ANY_THROW(buildKernel<Kernel>(op));
+}
+
+} // namespace
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.cpp b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.cpp
new file mode 100644
index 000000000..23c96a6db
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loader/KernelBuilderHelper.h"
+
+#include <luci/IR/Nodes/CircleOutput.h>
+
+namespace luci_interpreter
+{
+
+const Tensor *KernelBuilderHelper::getInputTensor(const loco::Node *node) const
+{
+  const Tensor *tensor = _node_to_tensor.at(node);
+  assert(tensor != nullptr);
+  return tensor;
+}
+
+const Tensor *KernelBuilderHelper::getOptionalInputTensor(const loco::Node *node) const
+{
+  if (dynamic_cast<const luci::CircleOutputExclude *>(node))
+  {
+    return nullptr;
+  }
+  return getInputTensor(node);
+}
+
+Tensor *KernelBuilderHelper::getOutputTensor(const loco::Node *node) const
+{
+  Tensor *tensor = _node_to_tensor.at(node);
+  assert(tensor != nullptr);
+  return tensor;
+}
+
+std::vector<Tensor *>
+KernelBuilderHelper::getOutputTensors(const std::vector<const loco::Node *> &nodes) const
+{
+  std::vector<Tensor *> tensors;
+  tensors.reserve(nodes.size());
+  for (const loco::Node *node : nodes)
+    tensors.push_back(getOutputTensor(node));
+  return tensors;
+}
+
+RuntimeGraph *KernelBuilderHelper::getRuntimeGraph(const loco::Graph *graph) const
+{
+  RuntimeGraph *runtime_graph = _graph_to_runtime_graph.at(graph);
+  assert(runtime_graph != nullptr);
+  return runtime_graph;
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.h b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.h
new file mode 100644
index 000000000..d6fb253b1
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/KernelBuilderHelper.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_LOADER_KERNELBUILDER_HELPER_H
+#define LUCI_INTERPRETER_LOADER_KERNELBUILDER_HELPER_H
+
+#include "core/Kernel.h"
+#include "core/RuntimeGraph.h"
+
+#include <loco/IR/Graph.h>
+#include <loco/IR/Node.h>
+
+#include <vector>
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+
+class KernelBuilderHelper
+{
+public:
+  KernelBuilderHelper(
+    const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph,
+    const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
+    : _graph_to_runtime_graph(graph_to_runtime_graph), _node_to_tensor(node_to_tensor)
+  {
+  }
+
+public:
+  const Tensor *getInputTensor(const loco::Node *node) const;
+  const Tensor *getOptionalInputTensor(const loco::Node *node) const;
+
+  Tensor *getOutputTensor(const loco::Node *node) const;
+  std::vector<Tensor *> getOutputTensors(const std::vector<const loco::Node *> &nodes) const;
+
+  RuntimeGraph *getRuntimeGraph(const loco::Graph *graph) const;
+
+public:
+  const std::unordered_map<const loco::Graph *, RuntimeGraph *> &graph_to_runtime_graph() const
+  {
+    return _graph_to_runtime_graph;
+  }
+
+  const std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor() const
+  {
+    return _node_to_tensor;
+  }
+
+private:
+  const std::unordered_map<const loco::Graph *, RuntimeGraph *> &_graph_to_runtime_graph;
+  const std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor;
+};
+
+template <typename CircleNodeOut>
+std::vector<const loco::Node *> collectOutputNodes(const loco::Node *node)
+{
+  std::vector<const CircleNodeOut *> output_nodes;
+  for (const loco::Node *loco_node : loco::succs(node))
+  {
+    output_nodes.push_back(loco::must_cast<const CircleNodeOut *>(loco_node));
+  }
+  std::sort(output_nodes.begin(), output_nodes.end(),
+            [](const CircleNodeOut *node1, const CircleNodeOut *node2) {
+              return node1->index() < node2->index();
+            });
+  return {output_nodes.cbegin(), output_nodes.cend()};
+}
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_LOADER_KERNELBUILDER_HELPER_H
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.cpp b/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.cpp
new file mode 100644
index 000000000..2f278b087
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.cpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ModuleLoader.h"
+
+#include "GraphLoader.h"
+
+namespace luci_interpreter
+{
+
+ModuleLoader::ModuleLoader(const luci::Module *module, RuntimeModule *runtime_module,
+                           RuntimeToIR &runtime_to_ir,
+                           std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor,
+                           IMemoryManager *memory_manager)
+  : _module(module), _runtime_module(runtime_module), _runtime_to_ir(runtime_to_ir),
+    _node_to_tensor(node_to_tensor), _memory_manager(memory_manager)
+{
+}
+
+void ModuleLoader::load()
+{
+  // Runtime graphs have to be created in advance, because they will be needed during the loading
+  // process for control flow nodes.
+  for (size_t i = 0; i < _module->size(); ++i)
+  {
+    _graph_to_runtime_graph.emplace(_module->graph(i), _runtime_module->addGraph(_memory_manager));
+  }
+  for (size_t i = 0; i < _module->size(); ++i)
+  {
+    const loco::Graph *graph = _module->graph(i);
+    RuntimeGraph *runtime_graph = _graph_to_runtime_graph.at(graph);
+    GraphLoader loader(graph, runtime_graph, _runtime_to_ir, _graph_to_runtime_graph,
+                       _node_to_tensor, _memory_manager);
+    loader.loadTensors();
+    loader.initInputOutputTensors();
+    loader.loadOperators();
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.h b/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.h
new file mode 100644
index 000000000..11326a2ee
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/ModuleLoader.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_LOADER_MODULELOADER_H
+#define LUCI_INTERPRETER_LOADER_MODULELOADER_H
+
+#include "core/RuntimeModule.h"
+#include "loader/RuntimeToIR.h"
+#include "luci_interpreter/MemoryManager.h"
+
+#include <luci/IR/Module.h>
+
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+
+class ModuleLoader
+{
+public:
+  ModuleLoader(const luci::Module *module, RuntimeModule *runtime_module,
+               RuntimeToIR &runtime_to_ir,
+               std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor,
+               IMemoryManager *memory_manager);
+
+  void load();
+
+private:
+  IMemoryManager *_memory_manager;
+  const luci::Module *_module;
+  RuntimeModule *_runtime_module;
+  RuntimeToIR &_runtime_to_ir;
+  std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor;
+  std::unordered_map<const loco::Graph *, RuntimeGraph *> _graph_to_runtime_graph;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_LOADER_MODULELOADER_H
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/RuntimeToIR.h b/compiler/luci-micro/luci-interpreter/src/loader/RuntimeToIR.h
new file mode 100644
index 000000000..9ea8b1fa2
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/RuntimeToIR.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_LOADER_RUNTIMETOIR_H
+#define LUCI_INTERPRETER_LOADER_RUNTIMETOIR_H
+
+#include "luci_interpreter/core/Tensor.h"
+
+#include <luci/IR/CircleNode.h>
+
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+
+// Maps runtime entities back to IR entities. It is used to implement observing functionality.
+struct RuntimeToIR
+{
+  std::unordered_map<const Tensor *, const luci::CircleNode *> tensor_to_node;
+  std::unordered_map<const Kernel *, const luci::CircleNode *> kernel_to_node;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_LOADER_RUNTIMETOIR_H
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Add.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Add.cpp
new file mode 100644
index 000000000..501e84752
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Add.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Add.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleAdd(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleAdd *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  AddParams params{};
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::Add>(input1, input2, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/ArgMax.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ArgMax.cpp
new file mode 100644
index 000000000..f3ca55744
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ArgMax.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ArgMax.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleArgMax(const luci::CircleNode *circle_node,
+                                                  KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleArgMax *>(circle_node);
+  assert(node->arity() == 2);
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *axis = helper.getInputTensor(node->dimension());
+  Tensor *output = helper.getOutputTensor(node);
+
+  ArgMaxParams params{};
+  params.output_type = node->output_type();
+
+  return std::make_unique<kernels::ArgMax>(input, axis, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/AveragePool2D.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/AveragePool2D.cpp
new file mode 100644
index 000000000..a8135706f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/AveragePool2D.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/AveragePool2D.h"
+#include <luci/Plan/CircleNodeExecutionPlan.h>
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleAveragePool2D(const luci::CircleNode *circle_node,
+                                                         KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleAveragePool2D *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->value());
+  Tensor *output = helper.getOutputTensor(node);
+
+  Pool2DParams params{};
+  params.padding = node->padding();
+  params.filter_height = node->filter()->h();
+  params.filter_width = node->filter()->w();
+  params.stride_height = node->stride()->h();
+  params.stride_width = node->stride()->w();
+  params.activation = node->fusedActivationFunction();
+
+  // It is unknown what data will be stored in scratchpad tensor,
+  // using UINT8 as a most general option
+  auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, "");
+  scratchpad->set_observable(false);
+  scratchpad->set_data_buffer(nullptr);
+  // If node has execution plan then read memory offsets for scratchpad temporary tensor
+  // from the beginning of shared memory buffer.
+  // Used in Static Memory Manager.
+  // TODO move tensors offset initialization to one place
+  if (luci::has_execution_plan(node))
+  {
+    const auto execution_plan = luci::get_execution_plan(node);
+    // Check whether the offset for the current CircleConv2D temporary was found.
+    if (execution_plan.offsets().size() > 1)
+      // If this is true, then we keep this offset in scratchpad.
+      scratchpad->set_offset(execution_plan.offsets().at(1));
+  }
+  Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad));
+
+  return std::make_unique<kernels::AveragePool2D>(input, output, tmp, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchMatMul.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchMatMul.cpp
new file mode 100644
index 000000000..9da2f6d93
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchMatMul.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/BatchMatMul.h"
+#include <luci/Plan/CircleNodeExecutionPlan.h>
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleBatchMatMul(const luci::CircleNode *circle_node,
+                                                       KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleBatchMatMul *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *lhs = helper.getInputTensor(node->x());
+  const Tensor *rhs = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  auto lhs_scratchpad =
+    std::make_unique<Tensor>(lhs->element_type(), Shape({}), AffineQuantization{}, "");
+  lhs_scratchpad->set_observable(false);
+  lhs_scratchpad->set_data_buffer(nullptr);
+  auto rhs_scratchpad =
+    std::make_unique<Tensor>(rhs->element_type(), Shape({}), AffineQuantization{}, "");
+  rhs_scratchpad->set_observable(false);
+  rhs_scratchpad->set_data_buffer(nullptr);
+  // If node has execution plan then read memory offsets for scratchpad temporary tensor
+  // from the beginning of shared memory buffer.
+  // Used in Static Memory Manager.
+  // TODO move tensors offset initialization to one place
+  if (luci::has_execution_plan(node))
+  {
+    const auto execution_plan = luci::get_execution_plan(node);
+    // Check whether the offset for the current BatchMatMul temporary was found.
+    if (execution_plan.offsets().size() > 1)
+    {
+      assert(execution_plan.offsets().size() == 3);
+
+      // If this is true, then we keep this offset in scratchpad.
+      lhs_scratchpad->set_offset(execution_plan.offsets().at(1));
+      rhs_scratchpad->set_offset(execution_plan.offsets().at(2));
+    }
+  }
+  Tensor *lhs_tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(lhs_scratchpad));
+  Tensor *rhs_tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(rhs_scratchpad));
+
+  BatchMatMulParams params;
+  params.adj_x = node->adj_x();
+  params.adj_y = node->adj_y();
+
+  return std::make_unique<kernels::BatchMatMul>(lhs, rhs, output, lhs_tmp, rhs_tmp, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp
new file mode 100644
index 000000000..ac6ebb30f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/BatchToSpaceND.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/BatchToSpaceND.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleBatchToSpaceND(const luci::CircleNode *circle_node,
+                                                          KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleBatchToSpaceND *>(circle_node);
+  assert(node->arity() == 3);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *block_shape = helper.getInputTensor(node->block_shape());
+  const Tensor *crops = helper.getInputTensor(node->crops());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::BatchToSpaceND>(input, block_shape, crops, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Builders.h b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Builders.h
new file mode 100644
index 000000000..eab284008
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Builders.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H
+#define LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H
+
+#include "loader/KernelBuilderHelper.h"
+
+#include "luci/IR/CircleNodes.h"
+
+namespace luci_interpreter
+{
+
+#define REGISTER_KERNEL(name)                                                            \
+  std::unique_ptr<Kernel> build_kernel_Circle##name(const luci::CircleNode *circle_node, \
+                                                    KernelBuilderHelper &helper);
+
+#include "KernelsToBuild.lst"
+
+#undef REGISTER_KERNEL
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_LOADER_NODES_BUILDERS_H
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Cast.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Cast.cpp
new file mode 100644
index 000000000..a16354c96
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Cast.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Cast.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleCast(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleCast *>(circle_node);
+
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Cast>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Concatenation.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Concatenation.cpp
new file mode 100644
index 000000000..ba2564ea2
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Concatenation.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Concatenation.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleConcatenation(const luci::CircleNode *circle_node,
+                                                         KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleConcatenation *>(circle_node);
+  std::vector<const Tensor *> inputs(node->numValues());
+  for (uint32_t i = 0; i < node->numValues(); ++i)
+  {
+    inputs[i] = helper.getInputTensor(node->values(i));
+  }
+  Tensor *output = helper.getOutputTensor(node);
+
+  ConcatenationParams params{};
+  params.axis = node->axis();
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::Concatenation>(std::move(inputs), output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Conv2D.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Conv2D.cpp
new file mode 100644
index 000000000..218165e20
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Conv2D.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Conv2D.h"
+#include <luci/Plan/CircleNodeExecutionPlan.h>
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleConv2D(const luci::CircleNode *circle_node,
+                                                  KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleConv2D *>(circle_node);
+  assert(node->arity() == 3);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *filter = helper.getInputTensor(node->filter());
+  const Tensor *bias = helper.getOptionalInputTensor(node->bias());
+  Tensor *output = helper.getOutputTensor(node);
+
+  // It is unknown what data will be stored in scratchpad tensor,
+  // using UINT8 as a most general option
+  auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, "");
+  scratchpad->set_observable(false);
+  scratchpad->set_data_buffer(nullptr);
+  // If node has execution plan then read memory offsets for scratchpad temporary tensor
+  // from the beginning of shared memory buffer.
+  // Used in Static Memory Manager.
+  // TODO move tensors offset initialization to one place
+  if (luci::has_execution_plan(node))
+  {
+    const auto execution_plan = luci::get_execution_plan(node);
+    // Check whether the offset for the current CircleConv2D temporary was found.
+    if (execution_plan.offsets().size() > 1)
+      // If this is true, then we keep this offset in scratchpad.
+      scratchpad->set_offset(execution_plan.offsets().at(1));
+  }
+  Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad));
+
+  Conv2DParams params{};
+  params.padding = node->padding();
+  params.stride_height = node->stride()->h();
+  params.stride_width = node->stride()->w();
+  params.dilation_height_factor = node->dilation()->h();
+  params.dilation_width_factor = node->dilation()->w();
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::Conv2D>(input, filter, bias, output, tmp, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthToSpace.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthToSpace.cpp
new file mode 100644
index 000000000..174946367
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthToSpace.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/DepthToSpace.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDepthToSpace(const luci::CircleNode *circle_node,
+                                                        KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleDepthToSpace *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  Tensor *output = helper.getOutputTensor(node);
+
+  DepthToSpaceParams params{};
+  params.block_size = node->block_size();
+
+  return std::make_unique<kernels::DepthToSpace>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp
new file mode 100644
index 000000000..8af1e3b58
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/DepthwiseConv2D.h"
+#include <luci/Plan/CircleNodeExecutionPlan.h>
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDepthwiseConv2D(const luci::CircleNode *circle_node,
+                                                           KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleDepthwiseConv2D *>(circle_node);
+  assert(node->arity() == 3);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *filter = helper.getInputTensor(node->filter());
+  const Tensor *bias = helper.getInputTensor(node->bias());
+  Tensor *output = helper.getOutputTensor(node);
+
+  DepthwiseConv2DParams params{};
+  params.padding = node->padding();
+  params.depth_multiplier = node->depthMultiplier();
+  params.stride_height = node->stride()->h();
+  params.stride_width = node->stride()->w();
+  params.dilation_height_factor = node->dilation()->h();
+  params.dilation_width_factor = node->dilation()->w();
+  params.activation = node->fusedActivationFunction();
+
+  // It is unknown what data will be stored in scratchpad tensor,
+  // using UINT8 as a most general option
+  auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, "");
+  scratchpad->set_observable(false);
+  scratchpad->set_data_buffer(nullptr);
+  // If node has execution plan then read memory offsets for scratchpad temporary tensor
+  // from the beginning of shared memory buffer.
+  // Used in Static Memory Manager.
+  // TODO move tensors offset initialization to one place
+  if (luci::has_execution_plan(node))
+  {
+    const auto execution_plan = luci::get_execution_plan(node);
+    // Check whether the offset for the current CircleConv2D temporary was found.
+    if (execution_plan.offsets().size() > 1)
+      // If this is true, then we keep this offset in scratchpad.
+      scratchpad->set_offset(execution_plan.offsets().at(1));
+  }
+  Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad));
+
+  return std::make_unique<kernels::DepthwiseConv2D>(input, filter, bias, output, tmp, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Dequantize.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Dequantize.cpp
new file mode 100644
index 000000000..787322e9b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Dequantize.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Dequantize.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDequantize(const luci::CircleNode *circle_node,
+                                                      KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleDequantize *>(circle_node);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Dequantize>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Div.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Div.cpp
new file mode 100644
index 000000000..0611dfdab
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Div.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Div.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleDiv(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleDiv *>(circle_node);
+  assert(node->arity() == 2);
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  DivParams params{};
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::Div>(input1, input2, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Elu.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Elu.cpp
new file mode 100644
index 000000000..a79985e3b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Elu.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Elu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleElu(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleElu *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->features());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Elu>(input, output);
+}
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Equal.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Equal.cpp
new file mode 100644
index 000000000..59692883f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Equal.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Equal.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleEqual(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+
+{
+  const auto *node = loco::must_cast<const luci::CircleEqual *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *x = helper.getInputTensor(node->x());
+  const Tensor *y = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Equal>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Exp.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Exp.cpp
new file mode 100644
index 000000000..30d11cb89
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Exp.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Exp.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleExp(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleExp *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Exp>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/ExpandDims.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ExpandDims.cpp
new file mode 100644
index 000000000..9840c34e5
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ExpandDims.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ExpandDims.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleExpandDims(const luci::CircleNode *circle_node,
+                                                      KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleExpandDims *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *axis = helper.getInputTensor(node->axis());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::ExpandDims>(input, axis, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Fill.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Fill.cpp
new file mode 100644
index 000000000..3aefdf1c5
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Fill.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Fill.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFill(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleFill *>(circle_node);
+  assert(node->arity() == 2);
+
+  const auto dims = helper.getInputTensor(node->dims());
+  const auto value = helper.getInputTensor(node->value());
+  auto output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Fill>(dims, value, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Floor.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Floor.cpp
new file mode 100644
index 000000000..e0a223116
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Floor.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Floor.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFloor(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleFloor *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Floor>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/FloorDiv.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/FloorDiv.cpp
new file mode 100644
index 000000000..a45d89e38
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/FloorDiv.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/FloorDiv.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFloorDiv(const luci::CircleNode *circle_node,
+                                                    KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleFloorDiv *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *x = helper.getInputTensor(node->x());
+  const Tensor *y = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::FloorDiv>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/FullyConnected.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/FullyConnected.cpp
new file mode 100644
index 000000000..b7b742b8a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/FullyConnected.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/FullyConnected.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleFullyConnected(const luci::CircleNode *circle_node,
+                                                          KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleFullyConnected *>(circle_node);
+  assert(node->arity() == 3);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *weights = helper.getInputTensor(node->weights());
+  const Tensor *bias = helper.getOptionalInputTensor(node->bias());
+  Tensor *output = helper.getOutputTensor(node);
+
+  FullyConnectedParams params{};
+  params.activation = node->fusedActivationFunction();
+  params.keep_num_dims = node->keep_num_dims();
+
+  return std::make_unique<kernels::FullyConnected>(input, weights, bias, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Gather.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Gather.cpp
new file mode 100644
index 000000000..2ee2906e0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Gather.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Gather.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleGather(const luci::CircleNode *circle_node,
+                                                  KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleGather *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *params = helper.getInputTensor(node->params());
+  const Tensor *indices = helper.getInputTensor(node->indices());
+  Tensor *output = helper.getOutputTensor(node);
+
+  GatherParams gparams{};
+  gparams.axis = node->axis();
+  // TODO support batch_dims
+  gparams.batch_dims = 0;
+
+  return std::make_unique<kernels::Gather>(params, indices, output, gparams);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Greater.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Greater.cpp
new file mode 100644
index 000000000..80aa63cf0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Greater.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Greater.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleGreater(const luci::CircleNode *circle_node,
+                                                   KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleGreater *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *x = helper.getInputTensor(node->x());
+  const Tensor *y = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Greater>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/GreaterEqual.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/GreaterEqual.cpp
new file mode 100644
index 000000000..272f2843b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/GreaterEqual.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/GreaterEqual.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleGreaterEqual(const luci::CircleNode *circle_node,
+                                                        KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleGreaterEqual *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *x = helper.getInputTensor(node->x());
+  const Tensor *y = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::GreaterEqual>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/If.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/If.cpp
new file mode 100644
index 000000000..3ac7d4941
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/If.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/If.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleIf(const luci::CircleNode *circle_node,
+                                              KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleIf *>(circle_node);
+  auto output_nodes = collectOutputNodes<luci::CircleIfOut>(node);
+  assert(node->arity() == 1 + node->input_count());
+  assert(output_nodes.size() == static_cast<size_t>(node->output_count()));
+
+  const Tensor *cond = helper.getInputTensor(node->cond());
+  std::vector<const Tensor *> inputs(node->input_count());
+  for (uint32_t i = 0; i < node->input_count(); ++i)
+  {
+    inputs[i] = helper.getInputTensor(node->input(i));
+  }
+  std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
+
+  RuntimeGraph *then_graph = helper.getRuntimeGraph(node->then_graph());
+  RuntimeGraph *else_graph = helper.getRuntimeGraph(node->else_graph());
+
+  return std::make_unique<kernels::If>(cond, std::move(inputs), std::move(outputs), then_graph,
+                                       else_graph);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/InstanceNorm.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/InstanceNorm.cpp
new file mode 100644
index 000000000..06031e5bc
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/InstanceNorm.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/InstanceNorm.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleInstanceNorm(const luci::CircleNode *circle_node,
+                                                        KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleInstanceNorm *>(circle_node);
+  assert(node->arity() == 3);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *gamma = helper.getInputTensor(node->gamma());
+  const Tensor *beta = helper.getInputTensor(node->beta());
+
+  Tensor *output = helper.getOutputTensor(node);
+
+  InstanceNormParams params{};
+  params.epsilon = node->epsilon();
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::InstanceNorm>(input, gamma, beta, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Normalize.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Normalize.cpp
new file mode 100644
index 000000000..6e22e6d4e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Normalize.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/L2Normalize.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleL2Normalize(const luci::CircleNode *circle_node,
+                                                       KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleL2Normalize *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  L2NormParams params{};
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::L2Normalize>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Pool2D.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Pool2D.cpp
new file mode 100644
index 000000000..95b55896f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/L2Pool2D.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/L2Pool2D.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleL2Pool2D(const luci::CircleNode *circle_node,
+                                                    KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleL2Pool2D *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->value());
+  Tensor *output = helper.getOutputTensor(node);
+
+  Pool2DParams params{};
+  params.padding = node->padding();
+  params.filter_height = node->filter()->h();
+  params.filter_width = node->filter()->w();
+  params.stride_height = node->stride()->h();
+  params.stride_width = node->stride()->w();
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::L2Pool2D>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LeakyRelu.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LeakyRelu.cpp
new file mode 100644
index 000000000..bbf5067b1
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LeakyRelu.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LeakyRelu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLeakyRelu(const luci::CircleNode *circle_node,
+                                                     KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleLeakyRelu *>(circle_node);
+  assert(node->arity() == 1);
+  const Tensor *input = helper.getInputTensor(node->features());
+  Tensor *output = helper.getOutputTensor(node);
+
+  LeakyReluParams params{};
+  params.alpha = node->alpha();
+
+  return std::make_unique<kernels::LeakyRelu>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Less.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Less.cpp
new file mode 100644
index 000000000..ae914ecc9
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Less.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Less.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLess(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleLess *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *x = helper.getInputTensor(node->x());
+  const Tensor *y = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Less>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LessEqual.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LessEqual.cpp
new file mode 100644
index 000000000..f1b424b55
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LessEqual.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LessEqual.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLessEqual(const luci::CircleNode *circle_node,
+                                                     KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleLessEqual *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *x = helper.getInputTensor(node->x());
+  const Tensor *y = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::LessEqual>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp
new file mode 100644
index 000000000..962ca2d7c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LocalResponseNormalization.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LocalResponseNormalization.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel>
+build_kernel_CircleLocalResponseNormalization(const luci::CircleNode *circle_node,
+                                              KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleLocalResponseNormalization *>(circle_node);
+  assert(node->arity() == 1);
+  const Tensor *input = helper.getInputTensor(node->input());
+  Tensor *output = helper.getOutputTensor(node);
+
+  LocalResponseNormalizationParams params{};
+  params.radius = node->radius();
+  params.bias = node->bias();
+  params.alpha = node->alpha();
+  params.beta = node->beta();
+
+  return std::make_unique<kernels::LocalResponseNormalization>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogSoftmax.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogSoftmax.cpp
new file mode 100644
index 000000000..432204115
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogSoftmax.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LogSoftmax.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogSoftmax(const luci::CircleNode *circle_node,
+                                                      KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleLogSoftmax *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->logits());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::LogSoftmax>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalAnd.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalAnd.cpp
new file mode 100644
index 000000000..bf3cb671a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalAnd.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LogicalAnd.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogicalAnd(const luci::CircleNode *circle_node,
+                                                      KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleLogicalAnd *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::LogicalAnd>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalNot.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalNot.cpp
new file mode 100644
index 000000000..fefcd9a06
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalNot.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LogicalNot.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogicalNot(const luci::CircleNode *circle_node,
+                                                      KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleLogicalNot *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::LogicalNot>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalOr.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalOr.cpp
new file mode 100644
index 000000000..a416cb401
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/LogicalOr.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/LogicalOr.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogicalOr(const luci::CircleNode *circle_node,
+                                                     KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleLogicalOr *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::LogicalOr>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Logistic.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Logistic.cpp
new file mode 100644
index 000000000..4a69deef1
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Logistic.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Logistic.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleLogistic(const luci::CircleNode *circle_node,
+                                                    KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleLogistic *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Logistic>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/MaxPool2D.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/MaxPool2D.cpp
new file mode 100644
index 000000000..f66a206ca
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/MaxPool2D.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/MaxPool2D.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMaxPool2D(const luci::CircleNode *circle_node,
+                                                     KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleMaxPool2D *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->value());
+  Tensor *output = helper.getOutputTensor(node);
+
+  Pool2DParams params{};
+  params.padding = node->padding();
+  params.filter_height = node->filter()->h();
+  params.filter_width = node->filter()->w();
+  params.stride_height = node->stride()->h();
+  params.stride_width = node->stride()->w();
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::MaxPool2D>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Maximum.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Maximum.cpp
new file mode 100644
index 000000000..d0bff776a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Maximum.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Maximum.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMaximum(const luci::CircleNode *circle_node,
+                                                   KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleMaximum *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Maximum>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mean.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mean.cpp
new file mode 100644
index 000000000..0dec63e79
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mean.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Mean.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMean(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleMean *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *axes = helper.getInputTensor(node->reduction_indices());
+  Tensor *output = helper.getOutputTensor(node);
+
+  auto temp_index_unique =
+    std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, "");
+  temp_index_unique->set_observable(false);
+  temp_index_unique->set_data_buffer(nullptr);
+  Tensor *temp_index =
+    helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_index_unique));
+
+  auto resolved_axes_unique =
+    std::make_unique<Tensor>(DataType::S32, Shape({}), AffineQuantization{}, "");
+  resolved_axes_unique->set_observable(false);
+  resolved_axes_unique->set_data_buffer(nullptr);
+  Tensor *resolved_axes =
+    helper.getRuntimeGraph(node->graph())->addTensor(std::move(resolved_axes_unique));
+
+  auto temp_sum_unique =
+    std::make_unique<Tensor>(input->element_type(), Shape({}), AffineQuantization{}, "");
+  temp_sum_unique->set_observable(false);
+  temp_sum_unique->set_data_buffer(nullptr);
+  Tensor *temp_sum = helper.getRuntimeGraph(node->graph())->addTensor(std::move(temp_sum_unique));
+
+  ReducerParams params{};
+  params.keep_dims = node->keep_dims();
+
+  return std::make_unique<kernels::Mean>(input, axes, output, temp_index, resolved_axes, temp_sum,
+                                         params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Minimum.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Minimum.cpp
new file mode 100644
index 000000000..1a49c1090
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Minimum.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Minimum.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMinimum(const luci::CircleNode *circle_node,
+                                                   KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleMinimum *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Minimum>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/MirrorPad.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/MirrorPad.cpp
new file mode 100644
index 000000000..b221b4574
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/MirrorPad.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/MirrorPad.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMirrorPad(const luci::CircleNode *circle_node,
+                                                     KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleMirrorPad *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *paddings = helper.getInputTensor(node->paddings());
+  Tensor *output = helper.getOutputTensor(node);
+
+  MirrorPadParams params{};
+  params.mode = node->mode();
+
+  return std::make_unique<kernels::MirrorPad>(input, paddings, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mul.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mul.cpp
new file mode 100644
index 000000000..f9984853a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Mul.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Mul.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleMul(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleMul *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  MulParams params{};
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::Mul>(input1, input2, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Neg.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Neg.cpp
new file mode 100644
index 000000000..9a9ecf991
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Neg.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Neg.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleNeg(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleNeg *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Neg>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/NotEqual.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/NotEqual.cpp
new file mode 100644
index 000000000..3916a5854
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/NotEqual.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/NotEqual.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleNotEqual(const luci::CircleNode *circle_node,
+                                                    KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleNotEqual *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *x = helper.getInputTensor(node->x());
+  const Tensor *y = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::NotEqual>(x, y, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/OneHot.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/OneHot.cpp
new file mode 100644
index 000000000..a40160945
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/OneHot.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/OneHot.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleOneHot(const luci::CircleNode *circle_node,
+                                                  KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleOneHot *>(circle_node);
+  assert(node->arity() == 4);
+
+  const Tensor *indices = helper.getInputTensor(node->indices());
+  const Tensor *depth = helper.getInputTensor(node->depth());
+  const Tensor *on_value = helper.getInputTensor(node->on_value());
+  const Tensor *off_value = helper.getInputTensor(node->off_value());
+  Tensor *output = helper.getOutputTensor(node);
+
+  OneHotParams params{};
+  params.axis = node->axis();
+
+  return std::make_unique<kernels::OneHot>(indices, depth, on_value, off_value, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/PRelu.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/PRelu.cpp
new file mode 100644
index 000000000..f3d700c95
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/PRelu.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/PRelu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePRelu(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CirclePRelu *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *alpha = helper.getInputTensor(node->alpha());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::PRelu>(input, alpha, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pack.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pack.cpp
new file mode 100644
index 000000000..efc5850e0
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pack.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Pack.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePack(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CirclePack *>(circle_node);
+  assert(node->arity() == node->values_count());
+
+  std::vector<const Tensor *> inputs(node->values_count());
+  for (uint32_t i = 0; i < node->values_count(); ++i)
+  {
+    inputs[i] = helper.getInputTensor(node->values(i));
+  }
+  Tensor *output = helper.getOutputTensor(node);
+
+  PackParams params{};
+  params.axis = node->axis();
+  params.values_count = node->values_count();
+
+  return std::make_unique<kernels::Pack>(std::move(inputs), output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pad.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pad.cpp
new file mode 100644
index 000000000..67ce997a7
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pad.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Pad.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePad(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CirclePad *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *paddings = helper.getInputTensor(node->paddings());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Pad>(input, paddings, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/PadV2.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/PadV2.cpp
new file mode 100644
index 000000000..e378a972a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/PadV2.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/PadV2.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePadV2(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CirclePadV2 *>(circle_node);
+  assert(node->arity() == 3);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *paddings = helper.getInputTensor(node->paddings());
+  const Tensor *constant_values = helper.getInputTensor(node->constant_values());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::PadV2>(input, paddings, constant_values, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pow.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pow.cpp
new file mode 100644
index 000000000..d32fc3dbb
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Pow.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Pow.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CirclePow(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CirclePow *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Pow>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Quantize.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Quantize.cpp
new file mode 100644
index 000000000..cb36fb6da
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Quantize.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Quantize.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleQuantize(const luci::CircleNode *circle_node,
+                                                    KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleQuantize *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Quantize>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu.cpp
new file mode 100644
index 000000000..1d64c1c4e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Relu.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleRelu(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleRelu *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->features());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Relu>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu6.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu6.cpp
new file mode 100644
index 000000000..e50cd2545
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Relu6.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Relu6.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleRelu6(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleRelu6 *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->features());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Relu6>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Reshape.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Reshape.cpp
new file mode 100644
index 000000000..76ddd88a3
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Reshape.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Reshape.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleReshape(const luci::CircleNode *circle_node,
+                                                   KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleReshape *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->tensor());
+  const Tensor *shape = helper.getInputTensor(node->shape());
+  Tensor *output = helper.getOutputTensor(node);
+
+  // NOTE 'newShape' attribute is ignored.
+  return std::make_unique<kernels::Reshape>(input, shape, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp
new file mode 100644
index 000000000..dc2b88ad3
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeBilinear.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ResizeBilinear.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleResizeBilinear(const luci::CircleNode *circle_node,
+                                                          KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleResizeBilinear *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *size = helper.getInputTensor(node->size());
+  Tensor *output = helper.getOutputTensor(node);
+
+  ResizeBilinearParams params{};
+  params.align_corners = node->align_corners();
+  params.half_pixel_centers = node->half_pixel_centers();
+
+  return std::make_unique<kernels::ResizeBilinear>(input, size, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp
new file mode 100644
index 000000000..c7058ae78
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ResizeNearestNeighbor.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ResizeNearestNeighbor.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel>
+build_kernel_CircleResizeNearestNeighbor(const luci::CircleNode *circle_node,
+                                         KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleResizeNearestNeighbor *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *size = helper.getInputTensor(node->size());
+  Tensor *output = helper.getOutputTensor(node);
+
+  ResizeNearestNeighborParams params{};
+  params.align_corners = node->align_corners();
+  // TODO update half_pixel_centers after CircleResizeNearestNeighbor updated
+  // Current CircleResizeNearestNeighbor don't have half_pixel_centers.
+  // default value on current is false.
+  // it need to be updated when CircleResizeNearestNeighbor updated.
+  params.half_pixel_centers = false;
+
+  return std::make_unique<kernels::ResizeNearestNeighbor>(input, size, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/ReverseV2.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ReverseV2.cpp
new file mode 100644
index 000000000..c1a7f5350
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/ReverseV2.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/ReverseV2.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleReverseV2(const luci::CircleNode *circle_node,
+                                                     KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleReverseV2 *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->tensor());
+  const Tensor *axes = helper.getInputTensor(node->axis());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::ReverseV2>(input, axes, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Rsqrt.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Rsqrt.cpp
new file mode 100644
index 000000000..0714a5dba
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Rsqrt.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Rsqrt.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleRsqrt(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleRsqrt *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Rsqrt>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/SVDF.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SVDF.cpp
new file mode 100644
index 000000000..d172ef438
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SVDF.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SVDF.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSVDF(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSVDF *>(circle_node);
+  assert(node->arity() == 5);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *feature = helper.getInputTensor(node->weight_feature());
+  const Tensor *time = helper.getInputTensor(node->weight_time());
+  const Tensor *bias = helper.getOptionalInputTensor(node->bias());
+  const Tensor *input_activation_state = helper.getInputTensor(node->input_activation_state());
+  Tensor *output = helper.getOutputTensor(node);
+
+  auto scratchpad_tensor = std::make_unique<Tensor>(input_activation_state->element_type(),
+                                                    Shape({}), AffineQuantization{}, "");
+  scratchpad_tensor->set_observable(false);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+  DataType data_type = input->element_type() == DataType::S8 ? DataType::S32 : DataType::FLOAT32;
+
+  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+  scratchpad_tensor->set_observable(false);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp_1 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+  if (data_type == DataType::FLOAT32 &&
+      (feature->element_type() == DataType::S8 || feature->element_type() == DataType::U8))
+  {
+    data_type = feature->element_type();
+  }
+
+  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+  scratchpad_tensor->set_observable(false);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp_2 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+  data_type = DataType::FLOAT32;
+
+  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+  scratchpad_tensor->set_observable(false);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp_3 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+  scratchpad_tensor->set_observable(false);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp_4 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+  scratchpad_tensor->set_observable(false);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp_5 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+  scratchpad_tensor = std::make_unique<Tensor>(data_type, Shape({}), AffineQuantization{}, "");
+  scratchpad_tensor->set_observable(false);
+  scratchpad_tensor->set_data_buffer(nullptr);
+  Tensor *tmp_6 = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratchpad_tensor));
+
+  SVDFParams params{};
+  params.activation = node->fusedActivationFunction();
+  params.svdf_rank = node->svdf_rank();
+  params.asymmetric_quantize_inputs = node->asymmetric_quantize_inputs();
+
+  return std::make_unique<kernels::SVDF>(input, feature, time, bias, input_activation_state, output,
+                                         tmp, tmp_1, tmp_2, tmp_3, tmp_4, tmp_5, tmp_6, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Shape.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Shape.cpp
new file mode 100644
index 000000000..d1edbc794
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Shape.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Shape.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleShape(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleShape *>(circle_node);
+  assert(node->arity() == 1);
+
+  const auto input = helper.getInputTensor(node->input());
+  auto output = helper.getOutputTensor(node);
+
+  ShapeParams shape_params{};
+  shape_params.out_type = node->out_type();
+
+  return std::make_unique<kernels::ShapeKernel>(input, output, shape_params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Slice.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Slice.cpp
new file mode 100644
index 000000000..60ac6417c
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Slice.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Slice.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSlice(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSlice *>(circle_node);
+  assert(node->arity() == 3);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *begin = helper.getInputTensor(node->begin());
+  const Tensor *size = helper.getInputTensor(node->size());
+
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Slice>(input, begin, size, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Softmax.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Softmax.cpp
new file mode 100644
index 000000000..f41f63f6f
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Softmax.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Softmax.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSoftmax(const luci::CircleNode *circle_node,
+                                                   KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSoftmax *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->logits());
+  Tensor *output = helper.getOutputTensor(node);
+
+  SoftmaxParams params{};
+  params.beta = node->beta();
+
+  return std::make_unique<kernels::Softmax>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp
new file mode 100644
index 000000000..b6e6cf516
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToBatchND.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SpaceToBatchND.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSpaceToBatchND(const luci::CircleNode *circle_node,
+                                                          KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSpaceToBatchND *>(circle_node);
+  assert(node->arity() == 3);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *block_shape = helper.getInputTensor(node->block_shape());
+  const Tensor *paddings = helper.getInputTensor(node->paddings());
+
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::SpaceToBatchND>(input, block_shape, paddings, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp
new file mode 100644
index 000000000..63fdb95ec
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SpaceToDepth.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SpaceToDepth.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSpaceToDepth(const luci::CircleNode *circle_node,
+                                                        KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSpaceToDepth *>(circle_node);
+  assert(node->arity() == 1);
+  const Tensor *input = helper.getInputTensor(node->input());
+
+  Tensor *output = helper.getOutputTensor(node);
+
+  SpaceToDepthParams params{};
+  params.block_size = node->block_size();
+
+  return std::make_unique<kernels::SpaceToDepth>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Split.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Split.cpp
new file mode 100644
index 000000000..3f6d4a7df
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Split.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Split.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSplit(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSplit *>(circle_node);
+  auto output_nodes = collectOutputNodes<luci::CircleSplitOut>(node);
+  assert(node->arity() == 2);
+  assert(output_nodes.size() == static_cast<size_t>(node->num_split()));
+
+  const Tensor *axis = helper.getInputTensor(node->split_dim());
+  const Tensor *input = helper.getInputTensor(node->input());
+  std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
+
+  // NOTE 'num_splits' attribute is ignored.
+  return std::make_unique<kernels::Split>(axis, input, std::move(outputs));
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/SplitV.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SplitV.cpp
new file mode 100644
index 000000000..0788822ca
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SplitV.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SplitV.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSplitV(const luci::CircleNode *circle_node,
+                                                  KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSplitV *>(circle_node);
+  auto output_nodes = collectOutputNodes<luci::CircleSplitVOut>(node);
+  assert(node->arity() == 3);
+  assert(output_nodes.size() == static_cast<size_t>(node->num_split()));
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *sizes_data = helper.getInputTensor(node->size_splits());
+  const Tensor *axis = helper.getInputTensor(node->split_dim());
+  std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
+
+  // NOTE 'num_splits' attribute is ignored.
+  return std::make_unique<kernels::SplitV>(input, sizes_data, axis, std::move(outputs));
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sqrt.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sqrt.cpp
new file mode 100644
index 000000000..b9843fe0b
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sqrt.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Sqrt.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSqrt(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSqrt *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Sqrt>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Square.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Square.cpp
new file mode 100644
index 000000000..0ad7c1772
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Square.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Square.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSquare(const luci::CircleNode *circle_node,
+                                                  KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSquare *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Square>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/SquaredDifference.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SquaredDifference.cpp
new file mode 100644
index 000000000..e4c6fd851
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/SquaredDifference.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/SquaredDifference.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSquaredDifference(const luci::CircleNode *circle_node,
+                                                             KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSquaredDifference *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::SquaredDifference>(input1, input2, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Squeeze.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Squeeze.cpp
new file mode 100644
index 000000000..6885f8077
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Squeeze.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Squeeze.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSqueeze(const luci::CircleNode *circle_node,
+                                                   KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSqueeze *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  Tensor *output = helper.getOutputTensor(node);
+
+  SqueezeParams params{};
+  params.squeeze_dims = node->squeeze_dims();
+
+  return std::make_unique<kernels::Squeeze>(input, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/StridedSlice.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/StridedSlice.cpp
new file mode 100644
index 000000000..359b4e3e9
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/StridedSlice.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/StridedSlice.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleStridedSlice(const luci::CircleNode *circle_node,
+                                                        KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleStridedSlice *>(circle_node);
+  assert(node->arity() == 4);
+
+  const Tensor *input = helper.getInputTensor(node->input());
+  const Tensor *begin = helper.getInputTensor(node->begin());
+  const Tensor *end = helper.getInputTensor(node->end());
+  const Tensor *strides = helper.getInputTensor(node->strides());
+
+  Tensor *output = helper.getOutputTensor(node);
+
+  StridedSliceParams params{};
+  params.begin_mask = node->begin_mask();
+  params.ellipsis_mask = node->ellipsis_mask();
+  params.end_mask = node->end_mask();
+  params.new_axis_mask = node->new_axis_mask();
+  params.shrink_axis_mask = node->shrink_axis_mask();
+
+  return std::make_unique<kernels::StridedSlice>(input, begin, end, strides, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sub.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sub.cpp
new file mode 100644
index 000000000..a6252cb53
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Sub.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Sub.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleSub(const luci::CircleNode *circle_node,
+                                               KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleSub *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = helper.getInputTensor(node->x());
+  const Tensor *input2 = helper.getInputTensor(node->y());
+  Tensor *output = helper.getOutputTensor(node);
+
+  SubParams params{};
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::Sub>(input1, input2, output, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Tanh.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Tanh.cpp
new file mode 100644
index 000000000..a58ef60a8
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Tanh.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Tanh.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleTanh(const luci::CircleNode *circle_node,
+                                                KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleTanh *>(circle_node);
+  assert(node->arity() == 1);
+
+  const Tensor *input = helper.getInputTensor(node->x());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Tanh>(input, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Transpose.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Transpose.cpp
new file mode 100644
index 000000000..ea17d8311
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Transpose.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Transpose.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleTranspose(const luci::CircleNode *circle_node,
+                                                     KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleTranspose *>(circle_node);
+  assert(node->arity() == 2);
+
+  const Tensor *input = helper.getInputTensor(node->a());
+  const Tensor *perm = helper.getInputTensor(node->perm());
+  Tensor *output = helper.getOutputTensor(node);
+
+  return std::make_unique<kernels::Transpose>(input, perm, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/TransposeConv.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/TransposeConv.cpp
new file mode 100644
index 000000000..d773e301e
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/TransposeConv.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/TransposeConv.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleTransposeConv(const luci::CircleNode *circle_node,
+                                                         KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleTransposeConv *>(circle_node);
+  assert(node->arity() == 4);
+
+  const Tensor *input_sizes = helper.getInputTensor(node->inputSizes());
+  const Tensor *filter = helper.getInputTensor(node->filter());
+  const Tensor *out_backprop = helper.getInputTensor(node->outBackprop());
+  const Tensor *bias = helper.getOptionalInputTensor(node->bias());
+
+  Tensor *output = helper.getOutputTensor(node);
+
+  DataType scratch_data_type =
+    helper.getInputTensor(node)->element_type() == DataType::S16 ? DataType::S64 : DataType::S32;
+
+  auto scratch_tensor =
+    std::make_unique<Tensor>(scratch_data_type, Shape({}), AffineQuantization{}, "");
+  scratch_tensor->set_observable(false);
+  scratch_tensor->set_data_buffer(nullptr);
+  Tensor *tmp = helper.getRuntimeGraph(node->graph())->addTensor(std::move(scratch_tensor));
+
+  TransposeConvParams params{};
+  params.padding = node->padding();
+  params.stride_height = node->stride()->h();
+  params.stride_width = node->stride()->w();
+
+  return std::make_unique<kernels::TransposeConv>(input_sizes, filter, out_backprop, bias, output,
+                                                  tmp, params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/Unpack.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Unpack.cpp
new file mode 100644
index 000000000..a1c0d323a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/Unpack.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/Unpack.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleUnpack(const luci::CircleNode *circle_node,
+                                                  KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleUnpack *>(circle_node);
+  auto output_nodes = collectOutputNodes<luci::CircleUnpackOut>(node);
+  assert(node->arity() == 1);
+  assert(output_nodes.size() == static_cast<size_t>(node->num()));
+
+  const Tensor *input = helper.getInputTensor(node->value());
+  std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
+
+  UnpackParams params{};
+  params.axis = node->axis();
+
+  // NOTE 'num' attribute is ignored.
+  return std::make_unique<kernels::Unpack>(input, std::move(outputs), params);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-micro/luci-interpreter/src/loader/nodes/While.cpp b/compiler/luci-micro/luci-interpreter/src/loader/nodes/While.cpp
new file mode 100644
index 000000000..8fde6ec8a
--- /dev/null
+++ b/compiler/luci-micro/luci-interpreter/src/loader/nodes/While.cpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Builders.h"
+
+#include "kernels/While.h"
+
+namespace luci_interpreter
+{
+
+std::unique_ptr<Kernel> build_kernel_CircleWhile(const luci::CircleNode *circle_node,
+                                                 KernelBuilderHelper &helper)
+{
+  const auto *node = loco::must_cast<const luci::CircleWhile *>(circle_node);
+
+  auto output_nodes = collectOutputNodes<luci::CircleWhileOut>(node);
+  assert(node->arity() == node->input_count());
+  assert(output_nodes.size() == static_cast<size_t>(node->output_count()));
+
+  std::vector<const Tensor *> inputs(node->input_count());
+  for (uint32_t i = 0; i < node->input_count(); ++i)
+  {
+    inputs[i] = helper.getInputTensor(node->input(i));
+  }
+  std::vector<Tensor *> outputs = helper.getOutputTensors(output_nodes);
+
+  RuntimeGraph *cond_graph = helper.getRuntimeGraph(node->cond_graph());
+  RuntimeGraph *body_graph = helper.getRuntimeGraph(node->body_graph());
+
+  return std::make_unique<kernels::While>(std::move(inputs), std::move(outputs), cond_graph,
+                                          body_graph);
+}
+
+} // namespace luci_interpreter