109 files changed, 9857 insertions, 0 deletions
diff --git a/compiler/luci-interpreter/CMakeLists.txt b/compiler/luci-interpreter/CMakeLists.txt
new file mode 100644
index 000000000..33fdc52aa
--- /dev/null
+++ b/compiler/luci-interpreter/CMakeLists.txt
@@ -0,0 +1,4 @@
+set(LUCI_INTERPRETER_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include")
+set(LUCI_INTERPRETER_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src")
+
+add_subdirectory(src)
diff --git a/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h b/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h
new file mode 100644
index 000000000..7a14bf6f8
--- /dev/null
+++ b/compiler/luci-interpreter/include/luci_interpreter/Interpreter.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_INTERPRETER_H
+#define LUCI_INTERPRETER_INTERPRETER_H
+
+#include "luci_interpreter/core/Tensor.h"
+
+#include <luci/IR/Nodes/CircleInput.h>
+#include <luci/IR/Nodes/CircleOutput.h>
+
+#include <luci/IR/Module.h>
+
+#include <memory>
+#include <vector>
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+
+class ExecutionObserver
+{
+public:
+  virtual ~ExecutionObserver();
+
+  // Called when the value of a tensor has been updated during execution.
+  virtual void postTensorWrite(const luci::CircleNode *node, const Tensor *tensor);
+
+  // Called before / after executing an operator.
+  // Note that these methods are not called for auxiliary operators (CircleInput, CircleOutput,
+  // CircleConst and Circle*Out).
+  virtual void preOperatorExecute(const luci::CircleNode *node);
+  virtual void postOperatorExecute(const luci::CircleNode *node);
+};
+
+class Interpreter
+{
+public:
+  explicit Interpreter(const luci::Module *module);
+
+  ~Interpreter();
+
+  void writeInputTensor(const luci::CircleInput *input_node, const void *data, size_t data_size);
+
+  void readOutputTensor(const luci::CircleOutput *output_node, void *data, size_t data_size);
+
+  void interpret();
+
+  void attachObserver(ExecutionObserver *observer);
+
+  const Tensor *getTensor(const loco::Node *node) { return _node_to_tensor[node]; }
+
+private:
+  std::unique_ptr<class RuntimeModule> _runtime_module;
+
+  // Observer functionality support.
+  std::unique_ptr<struct RuntimeToIR> _runtime_to_ir;
+  std::unordered_map<const loco::Node *, Tensor *> _node_to_tensor;
+  std::unique_ptr<class EventNotifier> _event_notifier;
+  std::vector<ExecutionObserver *> _observers;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_INTERPRETER_H
diff --git a/compiler/luci-interpreter/include/luci_interpreter/core/DataType.h b/compiler/luci-interpreter/include/luci_interpreter/core/DataType.h
new file mode 100644
index 000000000..27bf719b5
--- /dev/null
+++ b/compiler/luci-interpreter/include/luci_interpreter/core/DataType.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_DATATYPE_H
+#define LUCI_INTERPRETER_CORE_DATATYPE_H
+
+#include <loco/IR/DataType.h>
+#include <loco/IR/DataTypeTraits.h>
+
+#include <cstddef>
+
+namespace luci_interpreter
+{
+
+using DataType = loco::DataType;
+
+template <DataType DT> using DataTypeImpl = loco::DataTypeImpl<DT>;
+
+inline size_t getDataTypeSize(DataType data_type) { return loco::size(data_type); }
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_DATATYPE_H
diff --git a/compiler/luci-interpreter/include/luci_interpreter/core/Tensor.h b/compiler/luci-interpreter/include/luci_interpreter/core/Tensor.h
new file mode 100644
index 000000000..998789882
--- /dev/null
+++ b/compiler/luci-interpreter/include/luci_interpreter/core/Tensor.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_TENSOR_H
+#define LUCI_INTERPRETER_CORE_TENSOR_H
+
+#include "luci_interpreter/core/DataType.h"
+
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace luci_interpreter
+{
+
+class Shape
+{
+public:
+  explicit Shape(int rank) : _dims(rank, 0) {}
+
+  Shape(std::initializer_list<int32_t> dims) : _dims(dims.begin(), dims.end()) {}
+
+  int num_dims() const { return _dims.size(); }
+
+  int32_t dim(int i) const
+  {
+    assert(i >= 0 && i < static_cast<int>(_dims.size()));
+    return _dims[i];
+  }
+
+  int32_t &dim(int i)
+  {
+    assert(i >= 0 && i < static_cast<int>(_dims.size()));
+    return _dims[i];
+  }
+
+  int32_t num_elements() const
+  {
+    int32_t result = 1;
+    for (const int32_t dim : _dims)
+    {
+      result *= dim;
+    }
+    return result;
+  }
+
+  bool operator==(const Shape &other) const { return _dims == other._dims; }
+
+  bool operator!=(const Shape &other) const { return !operator==(other); }
+
+private:
+  std::vector<int32_t> _dims;
+};
+
+// Tensor affine quantization parameters.
+//
+// The relationship between real and quantized values:
+//   real_value = (quantized_value - zero_point) * scale
+//
+// In per-tensor case, 'scale' and 'zero_point' are one element each.
+// In per-channel case, 'scale' and 'zero_point' are N elements each, where N is the size
+// of the quantized dimension.
+//
+// Note that due to historical and performance reasons, per-tensor quantization uses unsigned
+// integer types, while per-channel uses signed types assuming 'zero_point' == 0.
+//
+// TODO Add 'quantized_dimension' field for per-channel case when IR provides it.
+struct AffineQuantization
+{
+  std::vector<float> scale;
+  std::vector<int32_t> zero_point;
+};
+
+class Tensor
+{
+public:
+  Tensor(DataType element_type, Shape shape, AffineQuantization quantization, std::string name);
+
+  DataType element_type() const { return _element_type; }
+
+  const Shape &shape() const { return _shape; }
+
+  float scale() const
+  {
+    assert(_quantization.scale.size() == 1);
+    return _quantization.scale[0];
+  }
+
+  float zero_point() const
+  {
+    assert(_quantization.zero_point.size() == 1);
+    return _quantization.zero_point[0];
+  }
+
+  template <typename T> const T *data() const { return reinterpret_cast<const T *>(_data.get()); }
+
+  template <typename T> T *data() { return reinterpret_cast<T *>(_data.get()); }
+
+  const std::string &name() const { return _name; }
+
+  void readData(void *data_ptr, size_t data_size) const;
+
+  void writeData(const void *data_ptr, size_t data_size);
+
+  void resize(const Shape &new_shape);
+
+private:
+  DataType _element_type;
+  Shape _shape;
+  AffineQuantization _quantization;
+  std::unique_ptr<uint8_t[]> _data;
+  std::string _name;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_TENSOR_H
diff --git a/compiler/luci-interpreter/requires.cmake b/compiler/luci-interpreter/requires.cmake
new file mode 100644
index 000000000..f411f387a
--- /dev/null
+++ b/compiler/luci-interpreter/requires.cmake
@@ -0,0 +1 @@
+require(luci)
diff --git a/compiler/luci-interpreter/src/CMakeLists.txt b/compiler/luci-interpreter/src/CMakeLists.txt
new file mode 100644
index 000000000..6a66f1425
--- /dev/null
+++ b/compiler/luci-interpreter/src/CMakeLists.txt
@@ -0,0 +1,35 @@
+nnas_find_package(TensorFlowSource EXACT 2.1.0 QUIET)
+nnas_find_package(TensorFlowGEMMLowpSource EXACT 2.1.0 QUIET)
+nnas_find_package(TensorFlowEigenSource EXACT 2.1.0 QUIET)
+
+if (NOT TensorFlowSource_FOUND)
+  message(STATUS "Skipping luci-interpreter: TensorFlow not found")
+  return()
+endif ()
+
+if (NOT TensorFlowGEMMLowpSource_FOUND)
+  message(STATUS "Skipping luci-interpreter: gemmlowp not found")
+  return()
+endif ()
+
+if (NOT TensorFlowEigenSource_FOUND)
+  message(STATUS "Skipping luci-interpreter: Eigen not found")
+  return()
+endif ()
+
+add_subdirectory(core)
+add_subdirectory(kernels)
+add_subdirectory(loader)
+
+set(SOURCES
+    "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/Interpreter.h"
+    Interpreter.cpp)
+
+add_library(luci_interpreter SHARED ${SOURCES})
+target_include_directories(luci_interpreter PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
+target_include_directories(luci_interpreter PRIVATE "${LUCI_INTERPRETER_SOURCE_DIR}")
+target_link_libraries(luci_interpreter
+    PUBLIC luci_lang luci_interpreter_loader luci_interpreter_core
+    PRIVATE nncc_common)
+
+install(TARGETS luci_interpreter DESTINATION lib)
diff --git a/compiler/luci-interpreter/src/Interpreter.cpp b/compiler/luci-interpreter/src/Interpreter.cpp
new file mode 100644
index 000000000..639ffc1f0
--- /dev/null
+++ b/compiler/luci-interpreter/src/Interpreter.cpp
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/Interpreter.h"
+
+#include "loader/ModuleLoader.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace
+{
+
+class EventNotifierImpl final : public EventNotifier
+{
+public:
+  EventNotifierImpl(const RuntimeToIR &runtime_to_ir,
+                    const std::vector<ExecutionObserver *> &observers)
+      : _runtime_to_ir(runtime_to_ir), _observers(observers)
+  {
+  }
+
+  void postTensorWrite(const Tensor *tensor) override
+  {
+    assert(tensor != nullptr);
+    for (const auto &observer : _observers)
+    {
+      observer->postTensorWrite(_runtime_to_ir.tensor_to_node.at(tensor), tensor);
+    }
+  }
+
+  void preOperatorExecute(const Kernel *kernel) override
+  {
+    assert(kernel != nullptr);
+    for (const auto &observer : _observers)
+    {
+      observer->preOperatorExecute(_runtime_to_ir.kernel_to_node.at(kernel));
+    }
+  }
+
+  void postOperatorExecute(const Kernel *kernel) override
+  {
+    assert(kernel != nullptr);
+    for (const auto &observer : _observers)
+    {
+      observer->postOperatorExecute(_runtime_to_ir.kernel_to_node.at(kernel));
+    }
+  }
+
+private:
+  const RuntimeToIR &_runtime_to_ir;
+  const std::vector<ExecutionObserver *> &_observers;
+};
+
+} // namespace
+
+Interpreter::Interpreter(const luci::Module *module)
+{
+  _runtime_to_ir = std::make_unique<RuntimeToIR>();
+  _event_notifier = std::make_unique<EventNotifierImpl>(*_runtime_to_ir, _observers);
+  _runtime_module = std::make_unique<RuntimeModule>(_event_notifier.get());
+  ModuleLoader loader(module, _runtime_module.get(), *_runtime_to_ir, _node_to_tensor);
+  loader.load();
+}
+
+Interpreter::~Interpreter() = default;
+
+void Interpreter::writeInputTensor(const luci::CircleInput *input_node, const void *data,
+                                   size_t data_size)
+{
+  Tensor *tensor = _runtime_module->getInputTensors()[input_node->index()];
+  if (tensor == nullptr)
+  {
+    const std::string &name = input_node->name();
+    throw std::runtime_error("Cannot find tensor for input node named \"" + name + "\".");
+  }
+  if (data != nullptr)
+    tensor->writeData(data, data_size);
+}
+
+void Interpreter::readOutputTensor(const luci::CircleOutput *output_node, void *data,
+                                   size_t data_size)
+{
+  Tensor *tensor = _runtime_module->getOutputTensors()[output_node->index()];
+  if (tensor == nullptr)
+  {
+    const std::string &name = output_node->name();
+    throw std::runtime_error("Cannot find tensor for output node named \"" + name + "\".");
+  }
+  if (data != nullptr)
+    tensor->readData(data, data_size);
+}
+
+void Interpreter::interpret() { _runtime_module->execute(); }
+
+void Interpreter::attachObserver(ExecutionObserver *observer)
+{
+  if (std::find(_observers.cbegin(), _observers.cend(), observer) != _observers.cend())
+    throw std::runtime_error("Observer is already attached.");
+  _observers.push_back(observer);
+}
+
+ExecutionObserver::~ExecutionObserver() = default;
+
+void ExecutionObserver::postTensorWrite(const luci::CircleNode *, const Tensor *) {}
+
+void ExecutionObserver::preOperatorExecute(const luci::CircleNode *) {}
+
+void ExecutionObserver::postOperatorExecute(const luci::CircleNode *) {}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/core/CMakeLists.txt b/compiler/luci-interpreter/src/core/CMakeLists.txt
new file mode 100644
index 000000000..e576dbd94
--- /dev/null
+++ b/compiler/luci-interpreter/src/core/CMakeLists.txt
@@ -0,0 +1,17 @@
+set(SOURCES
+    "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/core/DataType.h"
+    "${LUCI_INTERPRETER_INCLUDE_DIR}/luci_interpreter/core/Tensor.h"
+    EventNotifier.h
+    Kernel.h
+    KernelParams.h
+    RuntimeGraph.h
+    RuntimeGraph.cpp
+    RuntimeModule.h
+    Tensor.cpp)
+
+add_library(luci_interpreter_core STATIC ${SOURCES})
+set_target_properties(luci_interpreter_core PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(luci_interpreter_core PUBLIC "${LUCI_INTERPRETER_INCLUDE_DIR}")
+target_include_directories(luci_interpreter_core PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
+target_link_libraries(luci_interpreter_core PUBLIC luci_lang)
+target_link_libraries(luci_interpreter_core PRIVATE nncc_common)
diff --git a/compiler/luci-interpreter/src/core/EventNotifier.h b/compiler/luci-interpreter/src/core/EventNotifier.h
new file mode 100644
index 000000000..5c4fbd3be
--- /dev/null
+++ b/compiler/luci-interpreter/src/core/EventNotifier.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_EVENTNOTIFIER_H
+#define LUCI_INTERPRETER_CORE_EVENTNOTIFIER_H
+
+namespace luci_interpreter
+{
+
+// Used at execution stage to tell the interpreter that the runtime state has changed in some way.
+class EventNotifier
+{
+public:
+  virtual ~EventNotifier() = default;
+
+  virtual void postTensorWrite(const Tensor *tensor) = 0;
+  virtual void preOperatorExecute(const Kernel *kernel) = 0;
+  virtual void postOperatorExecute(const Kernel *kernel) = 0;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_EVENTNOTIFIER_H
diff --git a/compiler/luci-interpreter/src/core/Kernel.h b/compiler/luci-interpreter/src/core/Kernel.h
new file mode 100644
index 000000000..5f5efb219
--- /dev/null
+++ b/compiler/luci-interpreter/src/core/Kernel.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_KERNEL_H
+#define LUCI_INTERPRETER_CORE_KERNEL_H
+
+#include "luci_interpreter/core/Tensor.h"
+
+#include <vector>
+
+namespace luci_interpreter
+{
+
+// Base class for all kernels.
+class Kernel
+{
+protected:
+  Kernel(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs)
+      : _inputs(std::move(inputs)), _outputs(std::move(outputs))
+  {
+  }
+
+public:
+  virtual ~Kernel() = default;
+
+  std::vector<const Tensor *> getInputTensors() const { return _inputs; }
+  std::vector<Tensor *> getOutputTensors() const { return _outputs; }
+
+  // Configures the kernel.
+  // This function is currently called once for each kernel during interpreter construction,
+  // which makes it a convenient place for preparing (resizing) output tensors.
+  virtual void configure() = 0;
+
+  // Executes the kernel.
+  virtual void execute() const = 0;
+
+protected:
+  // NOTE Prefer not to use these in derived classes.
+  const std::vector<const Tensor *> _inputs;
+  const std::vector<Tensor *> _outputs;
+};
+
+// Base class for kernels with parameters.
+template <typename Params> class KernelWithParams : public Kernel
+{
+protected:
+  KernelWithParams(std::vector<const Tensor *> inputs, std::vector<Tensor *> outputs,
+                   const Params &params)
+      : Kernel(std::move(inputs), std::move(outputs)), _params(params)
+  {
+  }
+
+public:
+  const Params &params() const { return _params; }
+
+protected:
+  const Params _params;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_KERNEL_H
diff --git a/compiler/luci-interpreter/src/core/KernelParams.h b/compiler/luci-interpreter/src/core/KernelParams.h
new file mode 100644
index 000000000..a32e0d4a5
--- /dev/null
+++ b/compiler/luci-interpreter/src/core/KernelParams.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_KERNELPARAMS_H
+#define LUCI_INTERPRETER_CORE_KERNELPARAMS_H
+
+#include <luci/IR/AttrPadding.h>
+#include <luci/IR/AttrFusedActFunc.h>
+#include <luci_interpreter/core/DataType.h>
+
+#include <cstdint>
+#include <vector>
+
+namespace luci_interpreter
+{
+
+// Inject commonly used types into `luci_interpreter` namespace for convenience.
+using Activation = luci::FusedActFunc;
+using Padding = luci::Padding;
+
+struct AddParams
+{
+  Activation activation;
+};
+
+struct ArgMaxParams
+{
+  DataType output_type;
+};
+
+struct ConcatenationParams
+{
+  int axis;
+};
+
+struct Conv2DParams
+{
+  Padding padding;
+  int32_t stride_height;
+  int32_t stride_width;
+  int32_t dilation_height_factor;
+  int32_t dilation_width_factor;
+  Activation activation;
+};
+
+struct DepthwiseConv2DParams
+{
+  Padding padding;
+  int32_t depth_multiplier; // TODO Remove, as it can be calculated.
+  int32_t stride_height;
+  int32_t stride_width;
+  int32_t dilation_height_factor;
+  int32_t dilation_width_factor;
+  Activation activation;
+};
+
+struct FullyConnectedParams
+{
+  Activation activation;
+};
+
+struct L2NormParams
+{
+  Activation activation;
+};
+
+struct LeakyReluParams
+{
+  float alpha;
+};
+
+struct LocalResponseNormalizationParams
+{
+  int32_t radius;
+  float bias;
+  float alpha;
+  float beta;
+};
+
+struct MulParams
+{
+  Activation activation;
+};
+
+struct Pool2DParams
+{
+  Padding padding;
+  int32_t filter_height;
+  int32_t filter_width;
+  int32_t stride_height;
+  int32_t stride_width;
+  Activation activation;
+};
+
+struct ReducerParams
+{
+  bool keep_dims;
+};
+
+struct SpaceToDepthParams
+{
+  int block_size;
+};
+
+struct SoftmaxParams
+{
+  float beta;
+};
+
+struct StridedSliceParams
+{
+  int32_t begin_mask;
+  int32_t end_mask;
+  int32_t ellipsis_mask;
+  int32_t new_axis_mask;
+  int32_t shrink_axis_mask;
+};
+
+struct SqueezeParams
+{
+  std::vector<int32_t> squeeze_dims;
+};
+
+struct TransposeConvParams
+{
+  Padding padding;
+  int32_t stride_height;
+  int32_t stride_width;
+};
+
+struct UnpackParams
+{
+  int axis;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_KERNELPARAMS_H
diff --git a/compiler/luci-interpreter/src/core/RuntimeGraph.cpp b/compiler/luci-interpreter/src/core/RuntimeGraph.cpp
new file mode 100644
index 000000000..06f0fed15
--- /dev/null
+++ b/compiler/luci-interpreter/src/core/RuntimeGraph.cpp
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "core/RuntimeGraph.h"
+
+#include "core/RuntimeModule.h"
+
+#include <algorithm>
+
+namespace luci_interpreter
+{
+
+Tensor *RuntimeGraph::addTensor(std::unique_ptr<Tensor> &&tensor)
+{
+  assert(tensor != nullptr);
+  _tensors.push_back(std::move(tensor));
+  return _tensors.back().get();
+}
+
+void RuntimeGraph::setInputTensors(const std::vector<Tensor *> &input_tensors)
+{
+  assert(std::all_of(input_tensors.cbegin(), input_tensors.cend(),
+                     [](Tensor *tensor) { return tensor != nullptr; }));
+  _input_tensors = input_tensors;
+}
+
+void RuntimeGraph::setOutputTensors(const std::vector<Tensor *> &output_tensors)
+{
+  assert(std::all_of(output_tensors.cbegin(), output_tensors.cend(),
+                     [](Tensor *tensor) { return tensor != nullptr; }));
+  _output_tensors = output_tensors;
+}
+
+void RuntimeGraph::addKernel(std::unique_ptr<Kernel> &&kernel)
+{
+  assert(kernel != nullptr);
+  _kernels.push_back(std::move(kernel));
+}
+
+void RuntimeGraph::execute() const
+{
+  EventNotifier *event_notifier = _owning_module->getEventNotifier();
+
+  // Notify the observers that the input tensors have changed.
+  if (event_notifier != nullptr)
+  {
+    for (const Tensor *input_tensor : getInputTensors())
+    {
+      event_notifier->postTensorWrite(input_tensor);
+    }
+  }
+
+  for (const auto &kernel : _kernels)
+  {
+    if (event_notifier != nullptr)
+    {
+      event_notifier->preOperatorExecute(kernel.get());
+    }
+
+    // TODO The `configure` method should only be called if the outputs of an operator need to be
+    //  resized.
+    kernel->configure();
+    kernel->execute();
+
+    if (event_notifier != nullptr)
+    {
+      event_notifier->postOperatorExecute(kernel.get());
+    }
+
+    for (const Tensor *tensor : kernel->getOutputTensors())
+    {
+      if (event_notifier != nullptr)
+      {
+        event_notifier->postTensorWrite(tensor);
+      }
+    }
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/core/RuntimeGraph.h b/compiler/luci-interpreter/src/core/RuntimeGraph.h
new file mode 100644
index 000000000..6ddbea4e9
--- /dev/null
+++ b/compiler/luci-interpreter/src/core/RuntimeGraph.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H
+#define LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H
+
+#include "luci_interpreter/core/Tensor.h"
+#include "core/Kernel.h"
+
+#include <memory>
+#include <vector>
+
+namespace luci_interpreter
+{
+
+class RuntimeModule;
+
+class RuntimeGraph
+{
+public:
+  explicit RuntimeGraph(RuntimeModule *owning_module) : _owning_module(owning_module) {}
+
+  Tensor *addTensor(std::unique_ptr<Tensor> &&tensor);
+
+  void setInputTensors(const std::vector<Tensor *> &input_tensors);
+  void setOutputTensors(const std::vector<Tensor *> &output_tensors);
+
+  const std::vector<Tensor *> &getInputTensors() const { return _input_tensors; }
+  const std::vector<Tensor *> &getOutputTensors() const { return _output_tensors; }
+
+  void addKernel(std::unique_ptr<Kernel> &&kernel);
+
+  void execute() const;
+
+private:
+  RuntimeModule *_owning_module;
+  std::vector<std::unique_ptr<Tensor>> _tensors;
+  std::vector<Tensor *> _input_tensors;
+  std::vector<Tensor *> _output_tensors;
+
+  // Kernels in execution order.
+  std::vector<std::unique_ptr<Kernel>> _kernels;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_RUNTIMEGRAPH_H
diff --git a/compiler/luci-interpreter/src/core/RuntimeModule.h b/compiler/luci-interpreter/src/core/RuntimeModule.h
new file mode 100644
index 000000000..dccc3a173
--- /dev/null
+++ b/compiler/luci-interpreter/src/core/RuntimeModule.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H
+#define LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H
+
+#include "core/RuntimeGraph.h"
+#include "core/EventNotifier.h"
+
+#include <memory>
+#include <vector>
+
+namespace luci_interpreter
+{
+
+class RuntimeModule
+{
+public:
+  explicit RuntimeModule(EventNotifier *event_notifier) : _event_notifier(event_notifier) {}
+
+  EventNotifier *getEventNotifier() const { return _event_notifier; }
+
+  RuntimeGraph *addGraph()
+  {
+    _graphs.push_back(std::make_unique<RuntimeGraph>(this));
+    return _graphs.back().get();
+  }
+
+  const std::vector<Tensor *> &getInputTensors() const { return getMainGraph()->getInputTensors(); }
+  const std::vector<Tensor *> &getOutputTensors() const
+  {
+    return getMainGraph()->getOutputTensors();
+  }
+
+  void execute() const { getMainGraph()->execute(); }
+
+private:
+  RuntimeGraph *getMainGraph() const { return _graphs[0].get(); }
+
+  EventNotifier *const _event_notifier;
+  std::vector<std::unique_ptr<RuntimeGraph>> _graphs;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_CORE_RUNTIMEMODULE_H
diff --git a/compiler/luci-interpreter/src/core/Tensor.cpp b/compiler/luci-interpreter/src/core/Tensor.cpp
new file mode 100644
index 000000000..4fe7479e5
--- /dev/null
+++ b/compiler/luci-interpreter/src/core/Tensor.cpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luci_interpreter/core/Tensor.h"
+
+#include <cstring>
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+Tensor::Tensor(DataType element_type, Shape shape, AffineQuantization quantization,
+               std::string name)
+    : _element_type(element_type), _shape(std::move(shape)), _quantization(std::move(quantization)),
+      _name(std::move(name))
+{
+  const size_t element_size = getDataTypeSize(_element_type);
+  const int32_t num_elements = _shape.num_elements();
+  _data = std::make_unique<uint8_t[]>(num_elements * element_size);
+}
+
+void Tensor::readData(void *data_ptr, size_t data_size) const
+{
+  const size_t element_size = getDataTypeSize(element_type());
+  const int32_t num_elements = shape().num_elements();
+  if (data_size != num_elements * element_size)
+  {
+    throw std::invalid_argument("Invalid data size.");
+  }
+  assert(data_ptr != nullptr);
+  std::memcpy(data_ptr, data<void>(), data_size);
+}
+
+void Tensor::writeData(const void *data_ptr, size_t data_size)
+{
+  const size_t element_size = getDataTypeSize(element_type());
+  const int32_t num_elements = shape().num_elements();
+  if (data_size != num_elements * element_size)
+  {
+    throw std::invalid_argument("Invalid data size.");
+  }
+  assert(data_ptr != nullptr);
+  std::memcpy(data<void>(), data_ptr, data_size);
+}
+
+void Tensor::resize(const Shape &new_shape)
+{
+  _shape = new_shape;
+  const size_t element_size = getDataTypeSize(_element_type);
+  const int32_t num_elements = _shape.num_elements();
+  // NOTE: _data can be nullptr for empty tensors
+  _data = std::make_unique<uint8_t[]>(num_elements * element_size);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Add.cpp b/compiler/luci-interpreter/src/kernels/Add.cpp
new file mode 100644
index 000000000..9b9334792
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Add.cpp
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Add.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/add.h>
+#include <tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Add::Add(const Tensor *input1, const Tensor *input2, Tensor *output, const AddParams &params)
+    : KernelWithParams<AddParams>({input1, input2}, {output}, params)
+{
+}
+
+void Add::configure()
+{
+  assert(input1()->element_type() == input2()->element_type());
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Add::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Add::evalFloat() const
+{
+  float activation_min{};
+  float activation_max{};
+  calculateActivationRange(_params.activation, &activation_min, &activation_max);
+
+  tflite::ArithmeticParams params{};
+  params.float_activation_min = activation_min;
+  params.float_activation_max = activation_max;
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+      getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastAdd4DSlow(
+        params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+        getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<float>(input1()),
+                               getTensorShape(input2()), getTensorData<float>(input2()),
+                               getTensorShape(output()), getTensorData<float>(output()));
+  }
+}
+
+void Add::evalQuantized() const
+{
+  const auto input1_scale = static_cast<double>(input1()->scale());
+  const auto input2_scale = static_cast<double>(input2()->scale());
+  const auto output_scale = static_cast<double>(output()->scale());
+
+  const int left_shift = 20;
+  const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
+  const double real_input1_multiplier = input1_scale / twice_max_input_scale;
+  const double real_input2_multiplier = input2_scale / twice_max_input_scale;
+  const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
+
+  int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
+  int input1_shift{}, input2_shift{}, output_shift{};
+  quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift);
+  quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift);
+  quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::ArithmeticParams params{};
+  params.left_shift = left_shift;
+  // The kernel expects inputs' zero points to be negated.
+  params.input1_offset = -input1()->zero_point(); // Note the '-'.
+  params.input1_multiplier = input1_multiplier;
+  params.input1_shift = input1_shift;
+  params.input2_offset = -input2()->zero_point(); // Note the '-'.
+  params.input2_multiplier = input2_multiplier;
+  params.input2_shift = input2_shift;
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = output_multiplier;
+  params.output_shift = output_shift;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+      getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastAdd4DSlow(
+        params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
+        getTensorShape(input2()), getTensorData<uint8_t>(input2()), getTensorShape(output()),
+        getTensorData<uint8_t>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Add(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
+                               getTensorShape(input2()), getTensorData<uint8_t>(input2()),
+                               getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Add.h b/compiler/luci-interpreter/src/kernels/Add.h
new file mode 100644
index 000000000..a1f7e0406
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Add.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_ADD_H
+#define LUCI_INTERPRETER_KERNELS_ADD_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Add : public KernelWithParams<AddParams>
+{
+public:
+  Add(const Tensor *input1, const Tensor *input2, Tensor *output, const AddParams &params);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_ADD_H
diff --git a/compiler/luci-interpreter/src/kernels/Add.test.cpp b/compiler/luci-interpreter/src/kernels/Add.test.cpp
new file mode 100644
index 000000000..54e1cc672
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Add.test.cpp
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Add.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+// for quantized Add, the error shouldn't exceed step
+float GetTolerance(float min, float max)
+{
+  float kQuantizedStep = (max - min) / 255.0;
+  return kQuantizedStep;
+}
+
+TEST(AddTest, Uint8)
+{
+  std::initializer_list<int32_t> base_shape = {2, 3, 1, 2};
+  std::initializer_list<float> base_data = {-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
+                                            1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
+  std::initializer_list<int32_t> test_shapes[] = {
+      {1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  std::initializer_list<float> test_data = {0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+  std::initializer_list<int32_t> output_shapes[] = {
+      {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
+  std::vector<std::vector<float>> output_data = {
+      {-0.1f, 2.6f,  -0.7f, 2.8f,  0.7f,  3.0f,  1.1f, 0.8f,  0.5f, 1.0f,  1.9f, 1.4f,
+       1.0f,  -0.8f, 0.4f,  -0.6f, 1.8f,  -0.2f, 1.4f, 3.0f,  0.8f, 3.0f,  2.2f, 3.0f,
+       -1.4f, 0.3f,  -2.0f, 0.5f,  -0.6f, 0.9f,  0.9f, -1.9f, 0.3f, -1.7f, 1.7f, -1.3f},
+      {-0.1f, 2.6f, 0.5f, 1.0f, 1.8f, -0.2f, 1.4f, 3.0f, -2.0f, 0.5f, 1.7f, -1.3f},
+      {-0.1f, 2.5f,  0.0f,  2.6f,  -0.7f, 1.9f,  1.1f, 0.7f,  1.2f, 0.8f,  0.5f, 0.1f,
+       1.0f,  -0.9f, 1.1f,  -0.8f, 0.4f,  -1.5f, 1.7f, 3.0f,  2.2f, 3.0f,  2.1f, 3.0f,
+       -1.1f, 0.5f,  -0.6f, 1.0f,  -0.7f, 0.9f,  1.2f, -1.7f, 1.7f, -1.2f, 1.6f, -1.3f},
+      {-0.1f, 2.5f, 1.2f, 0.8f, 0.4f, -1.5f, 1.7f, 3.0f, -0.6f, 1.0f, 1.6f, -1.3f}};
+  float kQuantizedTolerance = GetTolerance(-3.f, 3.f);
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-3.f, 3.f);
+  for (int i = 0; i < output_data.size(); i++)
+  {
+    Tensor input1_tensor{
+        getElementType<uint8_t>(), base_shape, {{quant_param.first}, {quant_param.second}}, ""};
+    Tensor input2_tensor{
+        getElementType<uint8_t>(), test_shapes[i], {{quant_param.first}, {quant_param.second}}, ""};
+    std::vector<uint8_t> quantized_input1_value =
+        quantize<uint8_t>(base_data, quant_param.first, quant_param.second);
+    std::vector<uint8_t> quantized_input2_value =
+        quantize<uint8_t>(test_data, quant_param.first, quant_param.second);
+    input1_tensor.writeData(quantized_input1_value.data(),
+                            quantized_input1_value.size() * sizeof(uint8_t));
+    input2_tensor.writeData(quantized_input2_value.data(),
+                            quantized_input2_value.size() * sizeof(uint8_t));
+    Tensor output_tensor =
+        makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+
+    AddParams params{};
+    params.activation = Activation::NONE;
+
+    Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    kernel.execute();
+
+    EXPECT_THAT(dequantize<uint8_t>(extractTensorData<uint8_t>(output_tensor),
+                                    output_tensor.scale(), output_tensor.zero_point()),
+                ElementsAreArray(ArrayFloatNear(output_data[i], kQuantizedTolerance)));
+    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i]));
+  }
+  // Re-run with exchanged inputs.
+  for (int i = 0; i < output_data.size(); i++)
+  {
+    Tensor input1_tensor{
+        getElementType<uint8_t>(), test_shapes[i], {{quant_param.first}, {quant_param.second}}, ""};
+    Tensor input2_tensor{
+        getElementType<uint8_t>(), base_shape, {{quant_param.first}, {quant_param.second}}, ""};
+    std::vector<uint8_t> quantized_input1_value =
+        quantize<uint8_t>(test_data, quant_param.first, quant_param.second);
+    std::vector<uint8_t> quantized_input2_value =
+        quantize<uint8_t>(base_data, quant_param.first, quant_param.second);
+    input1_tensor.writeData(quantized_input1_value.data(),
+                            quantized_input1_value.size() * sizeof(uint8_t));
+    input2_tensor.writeData(quantized_input2_value.data(),
+                            quantized_input2_value.size() * sizeof(uint8_t));
+    Tensor output_tensor =
+        makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+
+    AddParams params{};
+    params.activation = Activation::NONE;
+
+    Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    kernel.execute();
+
+    EXPECT_THAT(dequantize<uint8_t>(extractTensorData<uint8_t>(output_tensor),
+                                    output_tensor.scale(), output_tensor.zero_point()),
+                ElementsAreArray(ArrayFloatNear(output_data[i], kQuantizedTolerance)));
+    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i]));
+  }
+}
+
+TEST(AddTest, Float)
+{
+  Shape base_shape = {2, 3, 1, 2};
+  std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  std::vector<std::vector<float>> test_outputs = {
+      {0.0f, 2.6f, 0.0f, 2.8f, 0.7f, 3.2f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f,
+       1.0f, 0.0f, 0.4f, 0.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.8f, 3.3f, 2.2f, 3.7f,
+       0.0f, 0.3f, 0.0f, 0.5f, 0.0f, 0.9f, 0.9f, 0.0f, 0.3f, 0.0f, 1.7f, 0.0f},
+      {0.0f, 2.6f, 0.5f, 1.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.0f, 0.5f, 1.7f, 0.0f},
+      {0.0f, 2.5f, 0.0f, 2.6f, 0.0f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f,
+       1.0f, 0.0f, 1.1f, 0.0f, 0.4f, 0.0f, 1.7f, 3.3f, 2.2f, 3.8f, 2.1f, 3.7f,
+       0.0f, 0.5f, 0.0f, 1.0f, 0.0f, 0.9f, 1.2f, 0.0f, 1.7f, 0.0f, 1.6f, 0.0f},
+      {0.0f, 2.5f, 1.2f, 0.8f, 0.4f, 0.0f, 1.7f, 3.3f, 0.0f, 1.0f, 1.6f, 0.0f}};
+  std::vector<float> input1_data{-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
+                                 1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
+  std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data);
+    Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data);
+    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+    AddParams params{};
+    params.activation = Activation::RELU;
+
+    Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor),
+                ::testing::ElementsAreArray(ArrayFloatNear(test_outputs[i], 0.0001f)))
+        << "With shape number " << i;
+  }
+  // Re-run with exchanged inputs.
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data);
+    Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data);
+    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+    AddParams params{};
+    params.activation = Activation::RELU;
+
+    Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor),
+                ::testing::ElementsAreArray(ArrayFloatNear(test_outputs[i], 0.0001f)))
+        << "With shape number " << i;
+  }
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ArgMax.cpp b/compiler/luci-interpreter/src/kernels/ArgMax.cpp
new file mode 100644
index 000000000..5c464ed09
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ArgMax.cpp
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ArgMax.h"
+#include "kernels/Utils.h"
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ArgMax::ArgMax(const Tensor *input, const Tensor *axis, Tensor *output, const ArgMaxParams &params)
+    : KernelWithParams<ArgMaxParams>({input, axis}, {output}, params)
+{
+}
+
+void ArgMax::configure()
+{
+  assert(axis()->element_type() == DataType::S32 || axis()->element_type() == DataType::S64);
+  assert(input()->shape().num_dims() >= 1);
+  const Shape &input_shape = input()->shape();
+  const int num_dims = input_shape.num_dims();
+  Shape output_shape(num_dims - 1);
+
+  // If axis value is negative, then update by adding input_shape's num_dims.
+  // If updated value also negative, then assert.
+  assert(axis()->shape().num_elements() == 1);
+  int axis_value = getTensorData<int32_t>(axis())[0];
+  if (axis_value < 0)
+    axis_value = axis_value + num_dims;
+  assert(axis_value >= 0);
+
+  int j = 0;
+  for (int i = 0; i < num_dims; i++)
+  {
+    if (i == axis_value)
+      continue;
+    output_shape.dim(j++) = input_shape.dim(i);
+  }
+
+  assert(output()->element_type() == _params.output_type);
+
+  output()->resize(output_shape);
+}
+
+void ArgMax::execute() const
+{
+
+#define TF_LITE_ARG_MAX(data_type, axis_type, output_type)                                     \
+  tflite::optimized_ops::ArgMinMax(getTensorShape(input()), getTensorData<data_type>(input()), \
+                                   getTensorData<axis_type>(axis()), getTensorShape(output()), \
+                                   getTensorData<output_type>(output()),                       \
+                                   std::greater<data_type>())
+  if (axis()->element_type() == DataType::S32)
+  {
+    switch (_params.output_type)
+    {
+      case DataType::S32:
+        switch (input()->element_type())
+        {
+          case DataType::FLOAT32:
+            TF_LITE_ARG_MAX(float, int32_t, int32_t);
+            break;
+          case DataType::U8:
+            TF_LITE_ARG_MAX(uint8_t, int32_t, int32_t);
+            break;
+          default:
+            throw std::runtime_error("Unsupported input type.");
+        }
+        break;
+      case DataType::S64:
+        switch (input()->element_type())
+        {
+          case DataType::FLOAT32:
+            TF_LITE_ARG_MAX(float, int32_t, int64_t);
+            break;
+          case DataType::U8:
+            TF_LITE_ARG_MAX(uint8_t, int32_t, int64_t);
+            break;
+          default:
+            throw std::runtime_error("Unsupported input type.");
+        }
+        break;
+      default:
+        throw std::runtime_error("Unsupported output type.");
+    }
+  }
+  else
+  {
+    switch (_params.output_type)
+    {
+      case DataType::S32:
+        switch (input()->element_type())
+        {
+          case DataType::FLOAT32:
+            TF_LITE_ARG_MAX(float, int64_t, int32_t);
+            break;
+          case DataType::U8:
+            TF_LITE_ARG_MAX(uint8_t, int64_t, int32_t);
+            break;
+          default:
+            throw std::runtime_error("Unsupported input type.");
+        }
+        break;
+      case DataType::S64:
+        switch (input()->element_type())
+        {
+          case DataType::FLOAT32:
+            TF_LITE_ARG_MAX(float, int64_t, int64_t);
+            break;
+          case DataType::U8:
+            TF_LITE_ARG_MAX(uint8_t, int64_t, int64_t);
+            break;
+          default:
+            throw std::runtime_error("Unsupported input type.");
+        }
+        break;
+      default:
+        throw std::runtime_error("Unsupported output type.");
+    }
+  }
+#undef TF_LITE_ARG_MAX
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ArgMax.h b/compiler/luci-interpreter/src/kernels/ArgMax.h
new file mode 100644
index 000000000..c851b5891
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ArgMax.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_ARGMAX_H
+#define LUCI_INTERPRETER_KERNELS_ARGMAX_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ArgMax : public KernelWithParams<ArgMaxParams>
+{
+public:
+  ArgMax(const Tensor *input, const Tensor *axis, Tensor *output, const ArgMaxParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *axis() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_ARGMAX_H
diff --git a/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp b/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp
new file mode 100644
index 000000000..5ac3b2f7a
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ArgMax.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T1, typename T2>
+void Check(std::initializer_list<int32_t> input_shape,
+           std::initializer_list<int32_t> dimension_shape,
+           std::initializer_list<int32_t> output_shape, std::initializer_list<T1> input_data,
+           std::initializer_list<int32_t> dimension_data, std::initializer_list<T2> output_data)
+{
+
+  Tensor input_tensor{getElementType<T1>(), input_shape, {}, ""};
+  input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(T1));
+  Tensor dimension_tensor{DataType::S32, dimension_shape, {}, ""};
+  dimension_tensor.writeData(dimension_data.begin(), dimension_data.size() * sizeof(int32_t));
+
+  Tensor output_tensor = makeOutputTensor(getElementType<T2>());
+
+  ArgMaxParams params{};
+  params.output_type = getElementType<T2>();
+  ArgMax kernel(&input_tensor, &dimension_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T2>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <typename T> class ArgMaxTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_CASE(ArgMaxTest, DataTypes);
+
+TYPED_TEST(ArgMaxTest, Simple)
+{
+  Check<TypeParam, int32_t>(/*input_shape=*/{1, 1, 1, 4}, /*dimension_shape=*/{},
+                            /*output_shape=*/{1, 1, 1},
+                            /*input_data=*/
+                            {
+                                1, 9, 7, 3,
+                            },
+                            /*dimension_data=*/{3}, /*output_data=*/{1});
+  Check<TypeParam, int64_t>(/*input_shape=*/{1, 1, 1, 4}, /*dimension_shape=*/{},
+                            /*output_shape=*/{1, 1, 1},
+                            /*input_data=*/
+                            {
+                                1, 9, 7, 3,
+                            },
+                            /*dimension_data=*/{3}, /*output_data=*/{1});
+}
+
+TYPED_TEST(ArgMaxTest, MultiDimensions)
+{
+  Check<TypeParam, int32_t>(/*input_shape=*/{1, 1, 2, 4}, /*dimension_shape=*/{},
+                            /*output_shape=*/{1, 1, 2},
+                            /*input_data=*/
+                            {
+                                1, 2, 7, 8, 1, 9, 7, 3,
+                            },
+                            /*dimension_data=*/{3}, /*output_data=*/{3, 1});
+  Check<TypeParam, int64_t>(/*input_shape=*/{1, 1, 2, 4}, /*dimension_shape=*/{},
+                            /*output_shape=*/{1, 1, 2},
+                            /*input_data=*/
+                            {
+                                1, 2, 7, 8, 1, 9, 7, 3,
+                            },
+                            /*dimension_data=*/{3}, /*output_data=*/{3, 1});
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp b/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp
new file mode 100644
index 000000000..6d1b8ead4
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/AveragePool2D.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+AveragePool2D::AveragePool2D(const Tensor *input, Tensor *output, const Pool2DParams &params)
+    : KernelWithParams<Pool2DParams>({input}, {output}, params)
+{
+}
+
+void AveragePool2D::configure()
+{
+  const Shape &input_shape = input()->shape();
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t depth = input_shape.dim(3);
+
+  const int32_t output_height = computeOutputSize(_params.padding, input_height,
+                                                  _params.filter_height, _params.stride_height);
+  const int32_t output_width =
+      computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width);
+
+  _padding_height =
+      computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height);
+  _padding_width =
+      computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width);
+
+  output()->resize({batches, output_height, output_width, depth});
+}
+
+void AveragePool2D::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void AveragePool2D::evalFloat() const
+{
+  float activation_min{};
+  float activation_max{};
+  calculateActivationRange(_params.activation, &activation_min, &activation_max);
+
+  tflite::PoolParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.filter_height = _params.filter_height;
+  params.filter_width = _params.filter_width;
+  params.float_activation_min = activation_min;
+  params.float_activation_max = activation_max;
+
+  tflite::reference_ops::AveragePool(params, getTensorShape(input()), getTensorData<float>(input()),
+                                     getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void AveragePool2D::evalQuantized() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::PoolParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.filter_height = _params.filter_height;
+  params.filter_width = _params.filter_width;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  tflite::reference_ops::AveragePool(params, getTensorShape(input()),
+                                     getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                     getTensorData<uint8_t>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.h b/compiler/luci-interpreter/src/kernels/AveragePool2D.h
new file mode 100644
index 000000000..91f212b3a
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_AVERAGEPOOL2D_H
+#define LUCI_INTERPRETER_KERNELS_AVERAGEPOOL2D_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class AveragePool2D : public KernelWithParams<Pool2DParams>
+{
+public:
+  AveragePool2D(const Tensor *input, Tensor *output, const Pool2DParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _padding_height{};
+  int32_t _padding_width{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_AVERAGEPOOL2D_H
diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp b/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp
new file mode 100644
index 000000000..7160e49e9
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/AveragePool2D.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(AveragePool2DTest, Float)
+{
+  Shape input_shape{1, 3, 5, 1};
+  std::vector<float> input_data{
+      -4, -3, -2, -1, 0,  //
+      1,  2,  3,  4,  5,  //
+      6,  7,  8,  9,  10, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 3;
+  params.stride_height = 1;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  AveragePool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+      0, 1.5, //
+      4.5, 6, //
+  };
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 1}));
+}
+
+TEST(AveragePool2DTest, Uint8_0)
+{
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
+  Tensor input_tensor{DataType::U8, {1, 2, 4, 1}, {{quant_param.first}, {quant_param.second}}, ""};
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  std::vector<uint8_t> quant_input = quantize<uint8_t>(
+      {
+          0, -6, 12, 4,  //
+          -3, -2, 10, 7, //
+      },
+      quant_param.first, quant_param.second);
+  input_tensor.writeData(quant_input.data(), quant_input.size() * sizeof(uint8_t));
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  AveragePool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(dequantize(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(),
+                         output_tensor.zero_point()),
+              ElementsAreArray(ArrayFloatNear({0.0, 6.0})));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 1}));
+}
+
+TEST(AveragePool2DTest, Uint8_1)
+{
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
+  Tensor input_tensor{DataType::U8, {1, 2, 4, 1}, {{quant_param.first}, {quant_param.second}}, ""};
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  std::vector<uint8_t> quant_input = quantize<uint8_t>(
+      {
+          0, 6, 12, 4, //
+          3, 2, 10, 7, //
+      },
+      quant_param.first, quant_param.second);
+  input_tensor.writeData(quant_input.data(), quant_input.size() * sizeof(uint8_t));
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  AveragePool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(dequantize(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(),
+                         output_tensor.zero_point()),
+              ElementsAreArray(ArrayFloatNear({2.75, 6.0})));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 1}));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/CMakeLists.txt b/compiler/luci-interpreter/src/kernels/CMakeLists.txt
new file mode 100644
index 000000000..fe3623135
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/CMakeLists.txt
@@ -0,0 +1,106 @@
+find_package(Threads REQUIRED)
+nnas_find_package(GTest REQUIRED)
+
+set(SOURCES
+    Add.h
+    Add.cpp
+    ArgMax.h
+    ArgMax.cpp
+    AveragePool2D.h
+    AveragePool2D.cpp
+    Concatenation.h
+    Concatenation.cpp
+    Conv2D.h
+    Conv2D.cpp
+    DepthwiseConv2D.h
+    DepthwiseConv2D.cpp
+    Elu.h
+    Elu.cpp
+    FullyConnected.h
+    FullyConnected.cpp
+    If.h
+    If.cpp
+    L2Normalize.h
+    L2Normalize.cpp
+    L2Pool2D.h
+    L2Pool2D.cpp
+    LeakyRelu.h
+    LeakyRelu.cpp
+    LocalResponseNormalization.h
+    LocalResponseNormalization.cpp
+    Logistic.h
+    Logistic.cpp
+    MaxPool2D.h
+    MaxPool2D.cpp
+    Mean.h
+    Mean.cpp
+    Mul.h
+    Mul.cpp
+    Pad.h
+    Pad.cpp
+    Reshape.h
+    Reshape.cpp
+    Softmax.h
+    Softmax.cpp
+    SpaceToDepth.h
+    SpaceToDepth.cpp
+    Split.h
+    Split.cpp
+    StridedSlice.h
+    StridedSlice.cpp
+    Squeeze.h
+    Squeeze.cpp
+    Transpose.h
+    Transpose.cpp
+    TransposeConv.h
+    TransposeConv.cpp
+    Unpack.h
+    Unpack.cpp)
+
+list(APPEND SOURCES Utils.h Utils.cpp)
+
+add_library(luci_interpreter_kernels STATIC ${SOURCES})
+set_target_properties(luci_interpreter_kernels PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(luci_interpreter_kernels PUBLIC ${LUCI_INTERPRETER_SOURCE_DIR})
+target_include_directories(luci_interpreter_kernels SYSTEM PRIVATE
+    "${TensorFlowGEMMLowpSource_DIR}"
+    "${TensorFlowEigenSource_DIR}"
+    "${TensorFlowSource_DIR}")
+target_link_libraries(luci_interpreter_kernels
+    PUBLIC luci_interpreter_core
+    PRIVATE nncc_common Threads::Threads)
+
+
+set(TEST_SOURCES
+    Add.test.cpp
+    ArgMax.test.cpp
+    AveragePool2D.test.cpp
+    Concatenation.test.cpp
+    Conv2D.test.cpp
+    DepthwiseConv2D.test.cpp
+    Elu.test.cpp
+    FullyConnected.test.cpp
+    If.test.cpp
+    L2Normalize.test.cpp
+    L2Pool2D.test.cpp
+    LeakyRelu.test.cpp
+    LocalResponseNormalization.test.cpp
+    Logistic.test.cpp
+    MaxPool2D.test.cpp
+    Mean.test.cpp
+    Mul.test.cpp
+    Pad.test.cpp
+    Reshape.test.cpp
+    Softmax.test.cpp
+    SpaceToDepth.test.cpp
+    Split.test.cpp
+    StridedSlice.test.cpp
+    Squeeze.test.cpp
+    Transpose.test.cpp
+    TransposeConv.test.cpp
+    Unpack.test.cpp)
+
+list(APPEND TEST_SOURCES TestUtils.h TestUtils.cpp)
+
+GTest_AddTest(luci_interpreter_kernels_test ${TEST_SOURCES})
+target_link_libraries(luci_interpreter_kernels_test luci_interpreter_kernels)
diff --git a/compiler/luci-interpreter/src/kernels/Concatenation.cpp b/compiler/luci-interpreter/src/kernels/Concatenation.cpp
new file mode 100644
index 000000000..812ab7609
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Concatenation.cpp
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Concatenation.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Concatenation::Concatenation(std::vector<const Tensor *> inputs, Tensor *output,
+                             const ConcatenationParams &params)
+    : KernelWithParams<ConcatenationParams>(std::move(inputs), {output}, params)
+{
+}
+
+void Concatenation::configure()
+{
+  const int num_inputs = _inputs.size();
+  assert(num_inputs > 0);
+  const Tensor *t0 = _inputs[0];
+
+  int axis = _params.axis;
+  if (axis < 0)
+    axis += t0->shape().num_dims();
+  assert(axis >= 0 && axis < t0->shape().num_dims());
+
+  int32_t sum_axis = t0->shape().dim(axis);
+  for (int i = 1; i < num_inputs; ++i)
+  {
+    const Tensor *tensor = _inputs[i];
+    assert(tensor->element_type() == t0->element_type());
+    assert(tensor->shape().num_dims() == t0->shape().num_dims());
+    for (int d = 0; d < t0->shape().num_dims(); ++d)
+    {
+      if (d == axis)
+      {
+        sum_axis += tensor->shape().dim(axis);
+      }
+      else
+      {
+        assert(tensor->shape().dim(d) == t0->shape().dim(d));
+      }
+    }
+  }
+
+  Shape output_shape = t0->shape();
+  output_shape.dim(axis) = sum_axis;
+
+  // TODO S8 type needs more checking: quantization parameters of all input tensors and the output
+  //  tensor should be the same. Note that there is no such requirement for U8 type.
+  if (t0->element_type() == DataType::S8)
+    throw std::runtime_error("Unsupported type.");
+
+  output()->resize(output_shape);
+}
+
+void Concatenation::execute() const
+{
+  switch (_inputs[0]->element_type())
+  {
+    case DataType::FLOAT32:
+      evalGeneric<float>();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    case DataType::S8:
+      evalGeneric<int8_t>();
+      break;
+    case DataType::S32:
+      evalGeneric<int32_t>();
+      break;
+    case DataType::S64:
+      evalGeneric<int64_t>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T> void Concatenation::evalGeneric() const
+{
+  int axis = _params.axis;
+  if (axis < 0)
+    axis += output()->shape().num_dims();
+
+  VectorOfTensors<T, true> inputs(_inputs);
+  tflite::ConcatenationParams params{};
+  params.axis = axis;
+  params.inputs_count = _inputs.size();
+  tflite::reference_ops::Concatenation(params, inputs.shapes(), inputs.data(),
+                                       getTensorShape(output()), getTensorData<T>(output()));
+}
+
+void Concatenation::evalQuantized() const
+{
+  int axis = _params.axis;
+  if (axis < 0)
+    axis += output()->shape().num_dims();
+
+  VectorOfQuantizedTensors<true> inputs(_inputs);
+  tflite::ConcatenationParams params{};
+  params.axis = axis;
+  params.input_zeropoint = inputs.zero_point();
+  params.input_scale = inputs.scale();
+  params.inputs_count = _inputs.size();
+  params.output_zeropoint = output()->zero_point();
+  params.output_scale = output()->scale();
+
+  tflite::reference_ops::ConcatenationWithScaling(params, inputs.shapes(), inputs.data(),
+                                                  getTensorShape(output()),
+                                                  getTensorData<uint8_t>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Concatenation.h b/compiler/luci-interpreter/src/kernels/Concatenation.h
new file mode 100644
index 000000000..b48c8ed1e
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Concatenation.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_CONCATENATION_H
+#define LUCI_INTERPRETER_KERNELS_CONCATENATION_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Concatenation : public KernelWithParams<ConcatenationParams>
+{
+public:
+  Concatenation(std::vector<const Tensor *> inputs, Tensor *output,
+                const ConcatenationParams &params);
+
+  const Tensor *input(int index) const { return _inputs[index]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> void evalGeneric() const;
+  void evalQuantized() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_CONCATENATION_H
diff --git a/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp b/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp
new file mode 100644
index 000000000..d9a7097d0
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Concatenation.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(ConcatenationTest, Float)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data);
+  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  // Try different 'axis' and expect different results.
+  {
+    params.axis = 0;
+
+    Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+    kernel.configure();
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor),
+                ElementsAreArray(ArrayFloatNear({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12})));
+  }
+  {
+    params.axis = -2; // Same as '0'.
+
+    Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+    kernel.configure();
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor),
+                ElementsAreArray(ArrayFloatNear({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12})));
+  }
+  {
+    params.axis = 1;
+
+    Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+    kernel.configure();
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor),
+                ElementsAreArray(ArrayFloatNear({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12})));
+  }
+  {
+    params.axis = -1; // Same as '1'.
+
+    Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+    kernel.configure();
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor),
+                ElementsAreArray(ArrayFloatNear({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12})));
+  }
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.cpp
new file mode 100644
index 000000000..60e6134ab
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.cpp
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Conv2D.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h>
+
+#include <stdexcept>
+#include <thread>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Conv2D::Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
+               const Conv2DParams &params)
+    : KernelWithParams<Conv2DParams>({input, filter, bias}, {output}, params)
+{
+}
+
+void Conv2D::configure()
+{
+  // TensorFlow Lite (as of v2.2.0) supports the following combinations of types:
+  //     | input filter bias  output |
+  // ----+---------------------------+
+  // (1) | float float  float float  |
+  // (2) | float int8   float float  | hybrid
+  // (3) | uint8 uint8  int32 uint8  | quantized
+  // (4) | int8  int8   int32 int8   | quantized per channel
+  //
+  // We only support (1) and (3) for now.
+  if (input()->element_type() == DataType::FLOAT32 && filter()->element_type() == DataType::FLOAT32)
+  {
+    assert(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
+  }
+  else if (input()->element_type() == DataType::U8 && filter()->element_type() == DataType::U8)
+  {
+    assert(bias() == nullptr || bias()->element_type() == DataType::S32);
+  }
+  else
+  {
+    throw std::runtime_error("Unsupported type.");
+  }
+  assert(output()->element_type() == input()->element_type());
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  assert(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4);
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t output_depth = filter_shape.dim(0);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  assert(filter_shape.dim(3) == input_shape.dim(3));
+
+  assert(bias() == nullptr ||
+         (bias()->shape().num_dims() == 1 && bias()->shape().dim(0) == output_depth));
+
+  const int32_t output_height =
+      computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
+                        _params.dilation_height_factor);
+  const int32_t output_width =
+      computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width,
+                        _params.dilation_width_factor);
+
+  _padding_height = computePadding(_params.stride_height, _params.dilation_height_factor,
+                                   input_height, filter_height, output_height);
+  _padding_width = computePadding(_params.stride_width, _params.dilation_width_factor, input_width,
+                                  filter_width, output_width);
+
+  output()->resize({batches, output_height, output_width, output_depth});
+
+  // Allocate tensor for Im2Col, if needed.
+  // The checks here should be aligned with the actual implementation.
+  const bool need_dilated_im2col =
+      _params.dilation_height_factor != 1 || _params.dilation_width_factor != 1;
+  const bool need_non_dilated_im2col = _params.stride_height != 1 || _params.stride_width != 1 ||
+                                       filter_height != 1 || filter_width != 1;
+  const bool need_im2col = need_dilated_im2col || need_non_dilated_im2col;
+  if (need_im2col)
+  {
+    const int input_depth = input_shape.dim(3);
+    Shape im2col_shape{batches, output_height, output_width,
+                       input_depth * filter_height * filter_width};
+    _im2col =
+        std::make_unique<Tensor>(input()->element_type(), im2col_shape, AffineQuantization{}, "");
+  }
+}
+
+void Conv2D::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      if (filter()->element_type() == DataType::FLOAT32)
+      {
+        evalFloat();
+        break;
+      }
+      throw std::runtime_error("Unsupported type.");
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Conv2D::evalFloat() const
+{
+  float activation_min{};
+  float activation_max{};
+  calculateActivationRange(_params.activation, &activation_min, &activation_max);
+
+  tflite::ConvParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+  params.float_activation_min = activation_min;
+  params.float_activation_max = activation_max;
+
+  tflite::optimized_ops::Conv(params, getTensorShape(input()), getTensorData<float>(input()),
+                              getTensorShape(filter()), getTensorData<float>(filter()),
+                              getTensorShape(bias()), getTensorData<float>(bias()),
+                              getTensorShape(output()), getTensorData<float>(output()),
+                              getTensorShape(_im2col.get()), getTensorData<float>(_im2col.get()));
+}
+
+void Conv2D::evalQuantized() const
+{
+  const auto input_scale = static_cast<double>(input()->scale());
+  const auto filter_scale = static_cast<double>(filter()->scale());
+  const auto output_scale = static_cast<double>(output()->scale());
+
+  const double real_multiplier = input_scale * filter_scale / output_scale;
+  int32_t output_multiplier{};
+  int output_shift{};
+  quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::ConvParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+  // The kernel expects input and filter zero points to be negated.
+  params.input_offset = -input()->zero_point();    // Note the '-'.
+  params.weights_offset = -filter()->zero_point(); // Note the '-'.
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = output_multiplier;
+  params.output_shift = output_shift;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  // TODO This should only be done once (although it takes only a few microseconds).
+  //  Also, the user should be able to adjust the number of threads.
+  auto gemmlowp_context = std::make_unique<gemmlowp::GemmContext>();
+  gemmlowp_context->set_max_num_threads(static_cast<int>(std::thread::hardware_concurrency()));
+
+  tflite::optimized_ops::Conv(
+      params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(filter()),
+      getTensorData<uint8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
+      getTensorShape(output()), getTensorData<uint8_t>(output()), getTensorShape(_im2col.get()),
+      getTensorData<uint8_t>(_im2col.get()), gemmlowp_context.get());
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.h b/compiler/luci-interpreter/src/kernels/Conv2D.h
new file mode 100644
index 000000000..69e309852
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_CONV2D_H
+#define LUCI_INTERPRETER_KERNELS_CONV2D_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <memory>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Conv2D : public KernelWithParams<Conv2DParams>
+{
+public:
+  Conv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
+         const Conv2DParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *filter() const { return _inputs[1]; }
+  const Tensor *bias() const { return _inputs[2]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+
+private:
+  std::unique_ptr<Tensor> _im2col;
+  int32_t _padding_height{};
+  int32_t _padding_width{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_CONV2D_H
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp
new file mode 100644
index 000000000..ef9ace903
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Conv2D.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(Conv2DTest, Float)
+{
+  Shape input_shape{1, 4, 3, 2};
+  Shape filter_shape{2, 2, 2, 2};
+  Shape bias_shape{2};
+  std::vector<float> input_data{
+      1,  2,  3,  4,  5,  6,  // row = 0
+      7,  8,  9,  10, 11, 12, // row = 1
+      13, 14, 15, 16, 17, 18, // row = 2
+      19, 20, 21, 22, 23, 24, // row = 3
+  };
+  std::vector<float> filter_data{
+      1,  2,  -3, -4, // out = 0, row = 0
+      -5, 6,  -7, 8,  // out = 1, row = 0
+      4,  -2, 3,  -1, // out = 0, row = 1
+      -8, -6, 7,  5,  // out = 1, row = 1
+  };
+  std::vector<float> bias_data{1, 2};
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
+  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+      11, 16, 7, 20, // row = 0
+      0,  40, 0, 44, // row = 1
+  };
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 2};
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST(Conv2DTest, FloatCheck)
+{
+  Shape input_shape{2, 2, 4, 1};
+  Shape filter_shape{3, 2, 2, 1};
+  Shape bias_shape{3};
+  std::vector<float> input_data{
+      // First batch
+      1, 1, 1, 1, // row = 1
+      2, 2, 2, 2, // row = 2
+      // Second batch
+      1, 2, 3, 4, // row = 1
+      1, 2, 3, 4, // row = 2
+  };
+  std::vector<float> filter_data{
+      1,  2,  3,  4, // first 2x2 filter
+      -1, 1,  -1, 1, // second 2x2 filter
+      -1, -1, 1,  1, // third 2x2 filter
+  };
+  std::vector<float> bias_data{1, 2, 3};
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
+  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::NONE;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+      18, 2, 5, // first batch, left
+      18, 2, 5, // first batch, right
+      17, 4, 3, // second batch, left
+      37, 4, 3, // second batch, right
+  };
+  std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST(Conv2DTest, Uint8)
+{
+  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
+  Shape bias_shape = {3};
+  Tensor input_tensor{
+      DataType::U8, {2, 2, 4, 1}, {{input_quant_param.first}, {input_quant_param.second}}, ""};
+  Tensor filter_tensor{
+      DataType::U8, {3, 2, 2, 1}, {{input_quant_param.first}, {input_quant_param.second}}, ""};
+  Tensor bias_tensor{
+      DataType::S32, bias_shape, {{input_quant_param.first * input_quant_param.first}, {0}}, ""};
+  Tensor output_tensor =
+      makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+  std::vector<uint8_t> quantized_input = quantize<uint8_t>(
+      {
+          // First batch
+          1, 1, 1, 1, // row = 1
+          2, 2, 2, 2, // row = 2
+          // Second batch
+          1, 2, 3, 4, // row = 1
+          1, 2, 3, 4, // row = 2
+      },
+      input_quant_param.first, input_quant_param.second);
+  std::vector<uint8_t> quantized_filter = quantize<uint8_t>(
+      {
+          1, 2, 3, 4,   // first 2x2 filter
+          -1, 1, -1, 1, // second 2x2 filter
+          -1, -1, 1, 1, // third 2x2 filter
+      },
+      input_quant_param.first, input_quant_param.second);
+  std::vector<int32_t> bias_data =
+      quantize<int32_t>({1, 2, 3}, input_quant_param.first * input_quant_param.first, 0);
+  input_tensor.writeData(quantized_input.data(), quantized_input.size() * sizeof(uint8_t));
+  filter_tensor.writeData(quantized_filter.data(), quantized_filter.size() * sizeof(uint8_t));
+  bias_tensor.writeData(bias_data.data(), bias_data.size() * sizeof(int32_t));
+
+  Conv2DParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::NONE;
+
+  Conv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+      18, 2, 5, // first batch, left
+      18, 2, 5, // first batch, right
+      17, 4, 3, // second batch, left
+      37, 4, 3, // second batch, right
+  };
+  std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
+  EXPECT_THAT(dequantize<uint8_t>(extractTensorData<uint8_t>(output_tensor),
+                                  output_quant_param.first, output_quant_param.second),
+              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
new file mode 100644
index 000000000..b01a5e086
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/DepthwiseConv2D.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h>
+#include <tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+DepthwiseConv2D::DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias,
+                                 Tensor *output, const DepthwiseConv2DParams &params)
+    : KernelWithParams<DepthwiseConv2DParams>({input, filter, bias}, {output}, params)
+{
+}
+
+void DepthwiseConv2D::configure()
+{
+  // TensorFlow Lite (as of v2.2.0) supports the following combinations of types:
+  //     | input filter bias  output |
+  // ----+---------------------------+
+  // (1) | float float  float float  |
+  // (2) | float int8   float float  | hybrid
+  // (3) | uint8 uint8  int32 uint8  | quantized
+  // (4) | int8  int8   int32 int8   | quantized per channel
+  // (5) | int16 int8   int64 int16  | quantized per channel 16x8
+  //
+  // We only support (1) and (3) for now.
+  if (input()->element_type() == DataType::FLOAT32 && filter()->element_type() == DataType::FLOAT32)
+  {
+    assert(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
+  }
+  else if (input()->element_type() == DataType::U8 && filter()->element_type() == DataType::U8)
+  {
+    assert(bias() == nullptr || bias()->element_type() == DataType::S32);
+  }
+  else
+  {
+    throw std::runtime_error("Unsupported type.");
+  }
+  assert(output()->element_type() == input()->element_type());
+
+  const Shape &input_shape = input()->shape();
+  const Shape &filter_shape = filter()->shape();
+  assert(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4);
+
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  // Filter format: [1, H, W, O].
+  assert(filter_shape.dim(0) == 1);
+  const int32_t filter_height = filter_shape.dim(1);
+  const int32_t filter_width = filter_shape.dim(2);
+  const int32_t channels_out = filter_shape.dim(3);
+
+  assert(bias() == nullptr ||
+         (bias()->shape().num_dims() == 1 && bias()->shape().dim(0) == channels_out));
+
+  const int32_t output_height =
+      computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
+                        _params.dilation_height_factor);
+  const int32_t output_width =
+      computeOutputSize(_params.padding, input_width, filter_width, _params.stride_width,
+                        _params.dilation_width_factor);
+
+  _padding_height = computePadding(_params.stride_height, _params.dilation_height_factor,
+                                   input_height, filter_height, output_height);
+  _padding_width = computePadding(_params.stride_width, _params.dilation_width_factor, input_width,
+                                  filter_width, output_width);
+
+  output()->resize({batches, output_height, output_width, channels_out});
+}
+
+void DepthwiseConv2D::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      if (filter()->element_type() == DataType::FLOAT32)
+      {
+        evalFloat();
+        break;
+      }
+      throw std::runtime_error("Unsupported type.");
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void DepthwiseConv2D::evalFloat() const
+{
+  float activation_min{};
+  float activation_max{};
+  calculateActivationRange(_params.activation, &activation_min, &activation_max);
+
+  tflite::DepthwiseParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+  params.depth_multiplier = _params.depth_multiplier;
+  params.float_activation_min = activation_min;
+  params.float_activation_max = activation_max;
+
+  tflite::reference_ops::DepthwiseConv(
+      params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
+      getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
+      getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void DepthwiseConv2D::evalQuantized() const
+{
+  const auto input_scale = static_cast<double>(input()->scale());
+  const auto filter_scale = static_cast<double>(filter()->scale());
+  const auto output_scale = static_cast<double>(output()->scale());
+
+  const double real_multiplier = input_scale * filter_scale / output_scale;
+  int32_t output_multiplier{};
+  int output_shift{};
+  quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::DepthwiseParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.dilation_height_factor = _params.dilation_height_factor;
+  params.dilation_width_factor = _params.dilation_width_factor;
+  params.depth_multiplier = _params.depth_multiplier;
+  // The kernel expects input and filter zero points to be negated.
+  params.input_offset = -input()->zero_point();    // Note the '-'.
+  params.weights_offset = -filter()->zero_point(); // Note the '-'.
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = output_multiplier;
+  params.output_shift = output_shift;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  tflite::reference_ops::DepthwiseConv(
+      params, getTensorShape(input()), getTensorData<uint8_t>(input()), getTensorShape(filter()),
+      getTensorData<uint8_t>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
+      getTensorShape(output()), getTensorData<uint8_t>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h
new file mode 100644
index 000000000..62f4bff0e
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_DEPTHWISECONV2D_H
+#define LUCI_INTERPRETER_KERNELS_DEPTHWISECONV2D_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class DepthwiseConv2D : public KernelWithParams<DepthwiseConv2DParams>
+{
+public:
+  DepthwiseConv2D(const Tensor *input, const Tensor *filter, const Tensor *bias, Tensor *output,
+                  const DepthwiseConv2DParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *filter() const { return _inputs[1]; }
+  const Tensor *bias() const { return _inputs[2]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _padding_height{};
+  int32_t _padding_width{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_DEPTHWISECONV2D_H
diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp
new file mode 100644
index 000000000..a9b43d864
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/DepthwiseConv2D.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(DepthwiseConv2DTest, Float)
+{
+  Shape input_shape{1, 4, 2, 2};
+  Shape filter_shape{1, 2, 2, 4};
+  Shape bias_shape{4};
+  std::vector<float> input_data{
+      1,  2,  7,  8,  //
+      3,  4,  9,  10, //
+      5,  6,  11, 12, //
+      13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+      1,  2,   3,   4,   //
+      -9, 10,  -11, 12,  //
+      5,  6,   7,   8,   //
+      13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
+  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+      71,  0, 99,  0,  //
+      167, 0, 227, 28, //
+  };
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 4}));
+}
+
+TEST(DepthwiseConv2DTest, Uint8)
+{
+  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
+
+  Tensor input_tensor{
+      DataType::U8, {1, 3, 2, 2}, {{input_quant_param.first}, {input_quant_param.second}}, ""};
+  Tensor filter_tensor{
+      DataType::U8, {1, 2, 2, 4}, {{input_quant_param.first}, {input_quant_param.second}}, ""};
+  Tensor bias_tensor{
+      DataType::S32, {4}, {{input_quant_param.first * input_quant_param.first}, {0}}, ""};
+  Tensor output_tensor =
+      makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+
+  std::vector<uint8_t> quant_input = quantize<uint8_t>(
+      {
+          1, 2, 7, 8,   // column 1
+          3, 4, 9, 10,  // column 2
+          5, 6, 11, 12, // column 3
+      },
+      input_quant_param.first, input_quant_param.second);
+  std::vector<uint8_t> quant_filter = quantize<uint8_t>(
+      {
+          1, 2, 3, 4,       //
+          -9, 10, -11, 12,  //
+          5, 6, 7, 8,       //
+          13, -14, 15, -16, //
+      },
+      input_quant_param.first, input_quant_param.second);
+  std::vector<int32_t> quant_bias =
+      quantize<int32_t>({1, 2, 3, 4}, input_quant_param.first * input_quant_param.first, 0);
+
+  input_tensor.writeData(quant_input.data(), quant_input.size() * sizeof(uint8_t));
+  filter_tensor.writeData(quant_filter.data(), quant_filter.size() * sizeof(uint8_t));
+  bias_tensor.writeData(quant_bias.data(), quant_bias.size() * sizeof(int32_t));
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 1;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::NONE;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+      71, -34, 99,  -20, //
+      91, -26, 127, -4,  //
+  };
+  EXPECT_THAT(dequantize(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(),
+                         output_tensor.zero_point()),
+              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 4}));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Elu.cpp b/compiler/luci-interpreter/src/kernels/Elu.cpp
new file mode 100644
index 000000000..5de4a1f3b
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Elu.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Elu.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Elu::Elu(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Elu::configure()
+{
+  assert(input()->element_type() == output()->element_type());
+  output()->resize(input()->shape());
+}
+
+void Elu::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::optimized_ops::Elu(getTensorShape(input()), getTensorData<float>(input()),
+                                 getTensorShape(output()), getTensorData<float>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Elu.h b/compiler/luci-interpreter/src/kernels/Elu.h
new file mode 100644
index 000000000..c844ab57f
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Elu.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_ELU_H
+#define LUCI_INTERPRETER_KERNELS_ELU_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Elu : public Kernel
+{
+public:
+  Elu(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_ELU_H
diff --git a/compiler/luci-interpreter/src/kernels/Elu.test.cpp b/compiler/luci-interpreter/src/kernels/Elu.test.cpp
new file mode 100644
index 000000000..52444cbea
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Elu.test.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Elu.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
+{
+  Tensor input_tensor{DataType::FLOAT32, input_shape, {}, ""};
+  input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(float));
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Elu kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  (void)output_shape;
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ArrayFloatNear(output_data)));
+}
+
+TEST(EluTest, SimpleElu)
+{
+  Check(
+      /*input_shape=*/{1, 2, 4, 1}, /*output_shape=*/{1, 2, 4, 1},
+      /*input_data=*/
+      {
+          0, -6, 2, -4,    //
+          3, -2, 10, -0.1, //
+      },
+      /*output_data=*/
+      {
+          0.0, -0.997521, 2.0, -0.981684,   //
+          3.0, -0.864665, 10.0, -0.0951626, //
+      });
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/FullyConnected.cpp b/compiler/luci-interpreter/src/kernels/FullyConnected.cpp
new file mode 100644
index 000000000..6529c5e77
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/FullyConnected.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/FullyConnected.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/fully_connected.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+FullyConnected::FullyConnected(const Tensor *input, const Tensor *weights, const Tensor *bias,
+                               Tensor *output, const FullyConnectedParams &params)
+    : KernelWithParams<FullyConnectedParams>({input, weights, bias}, {output}, params)
+{
+}
+
+void FullyConnected::configure()
+{
+  if (weights()->element_type() != DataType::FLOAT32)
+    throw std::runtime_error("Unsupported type.");
+
+  assert(input()->element_type() == DataType::FLOAT32);
+  assert(weights()->element_type() == DataType::FLOAT32);
+  assert(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
+
+  const Shape &input_shape = input()->shape();
+  const Shape &weights_shape = weights()->shape();
+
+  assert(weights_shape.num_dims() == 2);
+  assert(bias() == nullptr || bias()->shape().num_elements() == weights_shape.dim(0));
+
+  assert(input_shape.num_elements() % weights_shape.dim(1) == 0);
+  const int32_t batch_size = input_shape.num_elements() / weights_shape.dim(1);
+  const int32_t num_units = weights_shape.dim(0);
+
+  output()->resize({batch_size, num_units});
+}
+
+void FullyConnected::execute() const { evalFloat(); }
+
+void FullyConnected::evalFloat() const
+{
+  float activation_min{};
+  float activation_max{};
+  calculateActivationRange(_params.activation, &activation_min, &activation_max);
+
+  tflite::FullyConnectedParams params{};
+  params.float_activation_min = activation_min;
+  params.float_activation_max = activation_max;
+  params.weights_format = tflite::FullyConnectedWeightsFormat::kDefault;
+
+  tflite::reference_ops::FullyConnected(
+      params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(weights()),
+      getTensorData<float>(weights()), getTensorShape(bias()), getTensorData<float>(bias()),
+      getTensorShape(output()), getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/FullyConnected.h b/compiler/luci-interpreter/src/kernels/FullyConnected.h
new file mode 100644
index 000000000..2e3174c74
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/FullyConnected.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_FULLYCONNECTED_H
+#define LUCI_INTERPRETER_KERNELS_FULLYCONNECTED_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class FullyConnected : public KernelWithParams<FullyConnectedParams>
+{
+public:
+  FullyConnected(const Tensor *input, const Tensor *weights, const Tensor *bias, Tensor *output,
+                 const FullyConnectedParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *weights() const { return _inputs[1]; }
+  const Tensor *bias() const { return _inputs[2]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_FULLYCONNECTED_H
diff --git a/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp b/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp
new file mode 100644
index 000000000..8077fcb5c
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/FullyConnected.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(FullyConnectedTest, Float)
+{
+  Shape input_shape{3, 2, 2, 1};
+  std::vector<float> input_data{
+      -3, -5, 5,  4, 9,  -2, // batch = 0
+      -3, -2, -4, 9, -8, 1,  // batch = 1
+  };
+  Shape weights_shape{3, 6};
+  std::vector<float> weights_data{
+      -3, -7, 4, -4, -6, 4,  // unit = 0
+      3,  5,  2, 3,  -3, -8, // unit = 1
+      -3, 7,  4, 9,  0,  -5, // unit = 2
+  };
+  Shape bias_shape{3};
+  std::vector<float> bias_data{-1, -5, -8};
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor weights_tensor = makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data);
+  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FullyConnectedParams params{};
+  params.activation = Activation::RELU;
+
+  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+      0,  0,  32, // batch = 0
+      22, 11, 47, // batch = 1
+  };
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/If.cpp b/compiler/luci-interpreter/src/kernels/If.cpp
new file mode 100644
index 000000000..e6bdee338
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/If.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/If.h"
+
+#include <cstring>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+static std::vector<const Tensor *> joinInputs(const Tensor *cond,
+                                              const std::vector<const Tensor *> &inputs)
+{
+  std::vector<const Tensor *> result{cond};
+  result.insert(result.cend(), inputs.cbegin(), inputs.cend());
+  return result;
+}
+
+If::If(const Tensor *cond, const std::vector<const Tensor *> &inputs, std::vector<Tensor *> outputs,
+       RuntimeGraph *then_graph, RuntimeGraph *else_graph)
+    : Kernel(joinInputs(cond, inputs), std::move(outputs)), _then_graph(then_graph),
+      _else_graph(else_graph)
+{
+}
+
+void If::configure()
+{
+  assert(cond()->element_type() == DataType::BOOL);
+  assert(cond()->shape().num_elements() == 1);
+
+  for (RuntimeGraph *graph : {_then_graph, _else_graph})
+  {
+    (void)graph;
+    assert(graph->getInputTensors().size() == getInputTensors().size() - 1);
+    assert(graph->getOutputTensors().size() == getOutputTensors().size());
+  }
+}
+
+void If::execute() const
+{
+  const bool cond_value = cond()->data<bool>()[0];
+
+  RuntimeGraph *active_graph = cond_value ? _then_graph : _else_graph;
+  const auto &graph_inputs = active_graph->getInputTensors();
+  const auto &graph_outputs = active_graph->getOutputTensors();
+
+  // Copy kernel inputs to active graph inputs.
+  for (size_t i = 0; i < getInputTensors().size() - 1; ++i)
+  {
+    assert(graph_inputs[i]->element_type() == input(i)->element_type());
+    graph_inputs[i]->resize(input(i)->shape());
+
+    const int32_t num_elements = input(i)->shape().num_elements();
+    const std::size_t element_size = getDataTypeSize(input(i)->element_type());
+    std::memcpy(graph_inputs[i]->data<void>(), input(i)->data<void>(), num_elements * element_size);
+  }
+
+  active_graph->execute();
+
+  // Copy graph outputs to kernel outputs.
+  for (size_t i = 0; i < getOutputTensors().size(); ++i)
+  {
+    assert(graph_outputs[i]->element_type() == output(i)->element_type());
+    output(i)->resize(graph_outputs[i]->shape());
+
+    const int32_t num_elements = output(i)->shape().num_elements();
+    const std::size_t element_size = getDataTypeSize(output(i)->element_type());
+    std::memcpy(output(i)->data<void>(), graph_outputs[i]->data<void>(),
+                num_elements * element_size);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/If.h b/compiler/luci-interpreter/src/kernels/If.h
new file mode 100644
index 000000000..fa6ab371a
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/If.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_IF_H
+#define LUCI_INTERPRETER_KERNELS_IF_H
+
+#include "core/Kernel.h"
+#include "core/RuntimeGraph.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class If : public Kernel
+{
+public:
+  If(const Tensor *cond, const std::vector<const Tensor *> &inputs, std::vector<Tensor *> outputs,
+     RuntimeGraph *then_graph, RuntimeGraph *else_graph);
+
+  const Tensor *cond() const { return _inputs[0]; }
+  const Tensor *input(int index) const { return _inputs[1 + index]; }
+  Tensor *output(int index) const { return _outputs[index]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  RuntimeGraph *const _then_graph;
+  RuntimeGraph *const _else_graph;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_IF_H
diff --git a/compiler/luci-interpreter/src/kernels/If.test.cpp b/compiler/luci-interpreter/src/kernels/If.test.cpp
new file mode 100644
index 000000000..9b3857ce3
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/If.test.cpp
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "core/RuntimeModule.h"
+#include "kernels/Add.h"
+#include "kernels/If.h"
+#include "kernels/Mul.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+RuntimeGraph *buildAddSubgraph(RuntimeModule *module)
+{
+  RuntimeGraph *graph = module->addGraph();
+  Tensor *input1 = graph->addTensor(
+      std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+  Tensor *input2 = graph->addTensor(
+      std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+  Tensor *output = graph->addTensor(
+      std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+
+  graph->setInputTensors({input1, input2});
+  graph->setOutputTensors({output});
+
+  AddParams params{};
+  params.activation = Activation::NONE;
+  graph->addKernel(std::make_unique<Add>(input1, input2, output, params));
+
+  return graph;
+}
+
+RuntimeGraph *buildMulSubgraph(RuntimeModule *module)
+{
+  RuntimeGraph *graph = module->addGraph();
+  Tensor *input1 = graph->addTensor(
+      std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+  Tensor *input2 = graph->addTensor(
+      std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+  Tensor *output = graph->addTensor(
+      std::make_unique<Tensor>(DataType::FLOAT32, Shape{}, AffineQuantization{}, ""));
+
+  graph->setInputTensors({input1, input2});
+  graph->setOutputTensors({output});
+
+  MulParams params{};
+  params.activation = Activation::NONE;
+  graph->addKernel(std::make_unique<Mul>(input1, input2, output, params));
+
+  return graph;
+}
+
+TEST(IfTest, CondTrue)
+{
+  Tensor cond = makeInputTensor<DataType::BOOL>({1}, {true});
+  Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7});
+  Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2});
+  Tensor output = makeOutputTensor(DataType::FLOAT32);
+
+  RuntimeModule module(nullptr);
+  RuntimeGraph *then_graph = buildAddSubgraph(&module);
+  RuntimeGraph *else_graph = buildMulSubgraph(&module);
+
+  If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output), ElementsAreArray(ArrayFloatNear({6, 9})));
+}
+
+TEST(IfTest, CondFalse)
+{
+  Tensor cond = makeInputTensor<DataType::BOOL>({1}, {false});
+  Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7});
+  Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2});
+  Tensor output = makeOutputTensor(DataType::FLOAT32);
+
+  RuntimeModule module(nullptr);
+  RuntimeGraph *then_graph = buildAddSubgraph(&module);
+  RuntimeGraph *else_graph = buildMulSubgraph(&module);
+
+  If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output), ElementsAreArray(ArrayFloatNear({5, 14})));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/L2Normalize.cpp b/compiler/luci-interpreter/src/kernels/L2Normalize.cpp
new file mode 100644
index 000000000..cfa535075
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/L2Normalize.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/L2Normalize.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+L2Normalize::L2Normalize(const Tensor *input, Tensor *output, const L2NormParams &params)
+    : KernelWithParams<L2NormParams>({input}, {output}, params)
+{
+}
+
+void L2Normalize::configure()
+{
+  assert(input()->shape().num_dims() <= 4);
+  assert(output()->element_type() == DataType::FLOAT32 || output()->element_type() == DataType::U8);
+  assert(input()->element_type() == output()->element_type());
+  if (output()->element_type() == DataType::U8)
+  {
+    assert(output()->scale() == (1. / 128.));
+    assert(output()->zero_point() == 128);
+  }
+  assert(params().activation == Activation::NONE);
+  output()->resize(input()->shape());
+}
+
+void L2Normalize::execute() const
+{
+  switch (output()->element_type())
+  {
+    case DataType::FLOAT32:
+      eval<float>(0);
+      break;
+    case DataType::U8:
+      eval<uint8_t>(input()->zero_point());
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T> void L2Normalize::eval(int32_t zero_point) const
+{
+  tflite::L2NormalizationParams op_params{};
+  op_params.input_zero_point = zero_point;
+  tflite::optimized_ops::L2Normalization(op_params, getTensorShape(input()),
+                                         getTensorData<T>(input()), getTensorShape(output()),
+                                         getTensorData<T>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/L2Normalize.h b/compiler/luci-interpreter/src/kernels/L2Normalize.h
new file mode 100644
index 000000000..6c7dac698
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/L2Normalize.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_L2NORMALIZE_H
+#define LUCI_INTERPRETER_KERNELS_L2NORMALIZE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class L2Normalize : public KernelWithParams<L2NormParams>
+{
+public:
+  L2Normalize(const Tensor *input, Tensor *output, const L2NormParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> void eval(int32_t zero_point) const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_L2NORMALIZE_H
diff --git a/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp b/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp
new file mode 100644
index 000000000..fad450d66
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "kernels/L2Normalize.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(L2NormalizeTest, Float)
+{
+  std::vector<float> input_data = {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1};
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  L2NormParams params{};
+  params.activation = Activation::NONE;
+
+  L2Normalize kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{-0.55, 0.3, 0.35, 0.6, -0.35, 0.05};
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+}
+
+TEST(L2NormalizeTest, Uint8Quantized)
+{
+  // TODO
+  // Implement GetDequantizedOutput Function.
+  // Create Test for Uint8 Case
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/L2Pool2D.cpp b/compiler/luci-interpreter/src/kernels/L2Pool2D.cpp
new file mode 100644
index 000000000..37a6ddedc
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/L2Pool2D.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/L2Pool2D.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+L2Pool2D::L2Pool2D(const Tensor *input, Tensor *output, const Pool2DParams &params)
+    : KernelWithParams<Pool2DParams>({input}, {output}, params)
+{
+}
+
+void L2Pool2D::configure()
+{
+  assert(input()->shape().num_dims() == 4);
+  assert(input()->element_type() == output()->element_type());
+
+  int batches = input()->shape().dim(0);
+  int height = input()->shape().dim(1);
+  int width = input()->shape().dim(2);
+  int channels_out = input()->shape().dim(3);
+
+  // Matching GetWindowedOutputSize in TensorFlow.
+  auto padding = params().padding;
+  int out_width, out_height;
+  out_width = computeOutputSize(padding, width, params().filter_width, params().stride_width, 1);
+  out_height =
+      computeOutputSize(padding, height, params().filter_height, params().stride_height, 1);
+  _padding_width =
+      computePadding(params().stride_width, 1, width, params().filter_width, out_width);
+  _padding_height =
+      computePadding(params().stride_height, 1, height, params().filter_height, out_height);
+
+  assert(input()->element_type() == DataType::FLOAT32);
+  output()->resize({batches, out_height, out_width, channels_out});
+}
+
+void L2Pool2D::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      float activation_min, activation_max;
+      calculateActivationRange(params().activation, &activation_min, &activation_max);
+      tflite::PoolParams op_params;
+      op_params.stride_height = params().stride_height;
+      op_params.stride_width = params().stride_width;
+      op_params.filter_height = params().filter_height;
+      op_params.filter_width = params().filter_width;
+      op_params.padding_values.height = _padding_height;
+      op_params.padding_values.width = _padding_width;
+      op_params.float_activation_min = activation_min;
+      op_params.float_activation_max = activation_max;
+      tflite::optimized_ops::L2Pool(op_params, getTensorShape(input()),
+                                    getTensorData<float>(input()), getTensorShape(output()),
+                                    getTensorData<float>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/L2Pool2D.h b/compiler/luci-interpreter/src/kernels/L2Pool2D.h
new file mode 100644
index 000000000..d40f5f478
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/L2Pool2D.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_L2POOL2D_H
+#define LUCI_INTERPRETER_KERNELS_L2POOL2D_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <memory>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class L2Pool2D : public KernelWithParams<Pool2DParams>
+{
+public:
+  L2Pool2D(const Tensor *input, Tensor *output, const Pool2DParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  int32_t _padding_height = 0;
+  int32_t _padding_width = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_L2POOL2D_H
diff --git a/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp b/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp
new file mode 100644
index 000000000..06bb9388f
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/L2Pool2D.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(L2Pool2DTest, FloatNone)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+      0, 6, 2,  4, //
+      3, 2, 10, 7, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.activation = Activation::NONE;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{3.5, 6.5};
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  // TODO make a Shape checking of output_tensor.
+}
+
+TEST(L2Pool2DTest, FloatRelu)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+      -1, -6, 2,  4, //
+      -3, -2, 10, 7, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.activation = Activation::RELU;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{3.53553, 6.5};
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  // TODO make a Shape checking of output_tensor.
+}
+
+TEST(L2Pool2DTest, FloatRelu1)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+      -0.1, -0.6, 2,  4, //
+      -0.3, -0.2, 10, 7, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.activation = Activation::RELU_N1_TO_1;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.353553, 1.0};
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  // TODO make a Shape checking of output_tensor.
+}
+
+TEST(L2Pool2DTest, FloatRelu6)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+      -0.1, -0.6, 2,  4, //
+      -0.3, -0.2, 10, 7, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.activation = Activation::RELU6;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.353553, 6.0};
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  // TODO make a Shape checking of output_tensor.
+}
+
+TEST(L2Pool2DTest, FloatPaddingSame)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+      0, 6, 2,  4, //
+      3, 2, 10, 7, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::SAME;
+  params.activation = Activation::NONE;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{3.5, 6.5};
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  // TODO make a Shape checking of output_tensor.
+}
+
+TEST(L2Pool2DTest, FloatPaddingSameSlide1)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+      0, 6, 2,  4, //
+      3, 2, 10, 7, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::SAME;
+  params.activation = Activation::NONE;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 1;
+  params.stride_width = 1;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{3.5, 6.0, 6.5, 5.70088, 2.54951, 7.2111, 8.63134, 7.0};
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  // TODO make a Shape checking of output_tensor.
+}
+
+TEST(L2Pool2DTest, FloatPaddingValidSlide1)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+      0, 6, 2,  4, //
+      3, 2, 10, 7, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.activation = Activation::NONE;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 1;
+  params.stride_width = 1;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{3.5, 6.0, 6.5};
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  // TODO make a Shape checking of output_tensor.
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp b/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp
new file mode 100644
index 000000000..fce01a605
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LeakyRelu.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+LeakyRelu::LeakyRelu(const Tensor *input, Tensor *output, const LeakyReluParams &params)
+    : KernelWithParams<LeakyReluParams>({input}, {output}, params)
+{
+}
+
+void LeakyRelu::configure()
+{
+  assert(input()->element_type() == output()->element_type());
+  if (input()->element_type() == DataType::U8)
+  {
+    _q_alpha = static_cast<uint8_t>(std::max<float>(
+        std::numeric_limits<uint8_t>::min(),
+        std::min<float>(std::numeric_limits<uint8_t>::max(),
+                        std::round(input()->zero_point() + (params().alpha / input()->scale())))));
+    double real_multiplier = input()->scale() * input()->scale() / output()->scale();
+    quantizeMultiplierSmallerThanOneExp(real_multiplier, &_output_multiplier, &_output_shift);
+  }
+  output()->resize(input()->shape());
+}
+
+void LeakyRelu::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void LeakyRelu::evalFloat() const
+{
+  tflite::LeakyReluParams op_params{};
+  op_params.alpha = params().alpha;
+  tflite::optimized_ops::LeakyRelu(op_params, getTensorShape(input()),
+                                   getTensorData<float>(input()), getTensorShape(output()),
+                                   getTensorData<float>(output()));
+}
+
+void LeakyRelu::evalQuantized() const
+{
+  tflite::LeakyReluParams op_params{};
+  op_params.input_offset = input()->zero_point();
+  op_params.alpha_offset = input()->zero_point();
+  op_params.output_offset = output()->zero_point();
+
+  op_params.output_multiplier = _output_multiplier;
+  op_params.output_shift = _output_shift;
+
+  tflite::reference_ops::QuantizeLeakyRelu(
+      op_params, _q_alpha, getTensorShape(input()), getTensorData<uint8_t>(input()),
+      getTensorShape(output()), getTensorData<uint8_t>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/LeakyRelu.h b/compiler/luci-interpreter/src/kernels/LeakyRelu.h
new file mode 100644
index 000000000..dcc2be93f
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/LeakyRelu.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LEAKYRELU_H
+#define LUCI_INTERPRETER_KERNELS_LEAKYRELU_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class LeakyRelu : public KernelWithParams<LeakyReluParams>
+{
+public:
+  LeakyRelu(const Tensor *input, Tensor *output, const LeakyReluParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+
+private:
+  uint8_t _q_alpha = 0;
+  int32_t _output_multiplier = 0;
+  int _output_shift = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LEAKYRELU_H
diff --git a/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp b/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp
new file mode 100644
index 000000000..b0c06e7a3
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LeakyRelu.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<T> input_data, std::initializer_list<T> output_data, float alpha,
+           DataType element_type)
+{
+  Tensor input_tensor{element_type, input_shape, {}, ""};
+  input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(T));
+
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  LeakyReluParams params{};
+  params.alpha = alpha;
+
+  LeakyRelu kernel(&input_tensor, &output_tensor, params);
+
+  kernel.configure();
+  kernel.execute();
+
+  (void)output_shape;
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+}
+
+TEST(LeakReluTest, FloatSimple)
+{
+  Check<float>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 3}, /*input_data=*/
+               {
+                   0.0f, 1.0f, 3.0f,   // Row 1
+                   1.0f, -1.0f, -2.0f, // Row 2
+               },
+               /*output_data=*/
+               {
+                   0.0f, 1.0f, 3.0f,   // Row 1
+                   1.0f, -0.5f, -1.0f, // Row 2
+               },
+               /*alpha=*/0.5f, getElementType<float>());
+}
+
+TEST(LeakReluTest, Uint8Simple)
+{
+  // TODO
+  // Implement GetDequantizedOutput Function.
+  // Create Test for Uint8 Case
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.cpp b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.cpp
new file mode 100644
index 000000000..08efa1d6a
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LocalResponseNormalization.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+LocalResponseNormalization::LocalResponseNormalization(
+    const Tensor *input, Tensor *output, const LocalResponseNormalizationParams &params)
+    : KernelWithParams<LocalResponseNormalizationParams>({input}, {output}, params)
+{
+}
+
+void LocalResponseNormalization::configure()
+{
+  assert(input()->shape().num_dims() == 4);
+  assert(output()->element_type() == DataType::FLOAT32);
+  assert(input()->element_type() == output()->element_type());
+  output()->resize(input()->shape());
+}
+
+void LocalResponseNormalization::execute() const
+{
+  switch (output()->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::LocalResponseNormalizationParams op_params;
+      op_params.range = params().radius;
+      op_params.bias = params().bias;
+      op_params.alpha = params().alpha;
+      op_params.beta = params().beta;
+      tflite::optimized_ops::LocalResponseNormalization(
+          op_params, getTensorShape(input()), getTensorData<float>(input()),
+          getTensorShape(output()), getTensorData<float>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.h b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.h
new file mode 100644
index 000000000..60408a104
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LOCALRESPONSENORMALIZATION_H
+#define LUCI_INTERPRETER_KERNELS_LOCALRESPONSENORMALIZATION_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class LocalResponseNormalization : public KernelWithParams<LocalResponseNormalizationParams>
+{
+public:
+  LocalResponseNormalization(const Tensor *input, Tensor *output,
+                             const LocalResponseNormalizationParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LOCALRESPONSENORMALIZATION_H
diff --git a/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp
new file mode 100644
index 000000000..4191bdb29
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LocalResponseNormalization.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(LocalResponseNormalizationTest, SameAsL2Norm)
+{
+  Tensor input_tensor =
+      makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  LocalResponseNormalizationParams params{};
+  params.radius = 20;
+  params.bias = 0.0;
+  params.alpha = 1.0;
+  params.beta = 0.5;
+
+  LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ElementsAreArray(ArrayFloatNear({-0.55, 0.3, 0.35, 0.6, -0.35, 0.05})));
+}
+
+TEST(LocalResponseNormalizationTest, WithAlpha)
+{
+  Tensor input_tensor =
+      makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  LocalResponseNormalizationParams params{};
+  params.radius = 20;
+  params.bias = 0.0;
+  params.alpha = 4.0;
+  params.beta = 0.5;
+
+  LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ElementsAreArray(ArrayFloatNear({-0.275, 0.15, 0.175, 0.3, -0.175, 0.025})));
+}
+
+TEST(LocalResponseNormalizationTest, WithBias)
+{
+  Tensor input_tensor =
+      makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  LocalResponseNormalizationParams params{};
+  params.radius = 20;
+  params.bias = 9.0;
+  params.alpha = 4.0;
+  params.beta = 0.5;
+
+  LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ElementsAreArray(ArrayFloatNear({-0.22, 0.12, 0.14, 0.24, -0.14, 0.02})));
+}
+
+TEST(LocalResponseNormalizationTest, SmallRadius)
+{
+  Tensor input_tensor =
+      makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  LocalResponseNormalizationParams params{};
+  params.radius = 2;
+  params.bias = 9.0;
+  params.alpha = 4.0;
+  params.beta = 0.5;
+
+  LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ElementsAreArray(
+                  ArrayFloatNear({-0.264926, 0.125109, 0.140112, 0.267261, -0.161788, 0.0244266})));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Logistic.cpp b/compiler/luci-interpreter/src/kernels/Logistic.cpp
new file mode 100644
index 000000000..c7d45615c
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Logistic.cpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Logistic.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Logistic::Logistic(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Logistic::configure()
+{
+  assert(input()->element_type() == output()->element_type());
+  if (input()->element_type() == DataType::U8)
+  {
+    assert(output()->scale() == 1. / 256);
+    populateLookupTable();
+  }
+  output()->resize(input()->shape());
+}
+
+void Logistic::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Logistic::evalFloat() const
+{
+  tflite::reference_ops::Logistic(getTensorShape(input()), getTensorData<float>(input()),
+                                  getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void Logistic::evalQuantized() const
+{
+  const int size = tflite::MatchingFlatSize(getTensorShape(input()), getTensorShape(output()));
+  uint8_t *output_data = getTensorData<uint8_t>(output());
+  const uint8_t *input_data = getTensorData<uint8_t>(input());
+  for (int i = 0; i < size; ++i)
+  {
+    output_data[i] = getTableValue(input_data[i]);
+  }
+}
+
+void Logistic::populateLookupTable()
+{
+  const auto input_scale = static_cast<double>(input()->scale());
+  const auto input_zero_point = static_cast<int32_t>(input()->zero_point());
+  const auto output_scale = static_cast<double>(output()->scale());
+  const auto output_zero_point = static_cast<int32_t>(output()->zero_point());
+  const float inverse_scale = 1 / output_scale;
+  int32_t maxval = std::numeric_limits<uint8_t>::max();
+  int32_t minval = std::numeric_limits<uint8_t>::min();
+  for (int32_t val = minval; val <= maxval; ++val)
+  {
+    const float dequantized = input_scale * (val - input_zero_point);
+    const float transformed = 1.0f / (1.0f + std::exp(-dequantized));
+    const float rescaled = std::round(transformed * inverse_scale);
+    const int32_t quantized = static_cast<int32_t>(rescaled + output_zero_point);
+    setTableValue(static_cast<uint8_t>(std::max(std::min(maxval, quantized), minval)),
+                  static_cast<uint8_t>(val));
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Logistic.h b/compiler/luci-interpreter/src/kernels/Logistic.h
new file mode 100644
index 000000000..31de6adf0
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Logistic.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LOGISTIC_H
+#define LUCI_INTERPRETER_KERNELS_LOGISTIC_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Logistic : public Kernel
+{
+public:
+  Logistic(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void populateLookupTable();
+  void setTableValue(uint8_t value, uint8_t idx) { _table[idx] = value; };
+  uint8_t getTableValue(uint8_t idx) const { return _table[idx]; };
+
+private:
+  uint8_t _table[256]{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LOGISTIC_H
diff --git a/compiler/luci-interpreter/src/kernels/Logistic.test.cpp b/compiler/luci-interpreter/src/kernels/Logistic.test.cpp
new file mode 100644
index 000000000..17456a4a8
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Logistic.test.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Logistic.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(LogisticTest, Float)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+      0, -6, 2,  4, //
+      3, -2, 10, 1, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Logistic kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+      0.5,      0.002473, 0.880797, 0.982014, //
+      0.952574, 0.119203, 0.999955, 0.731059, //
+  };
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  // TODO make a Shape checking of output_tensor.
+}
+
+TEST(LogisticTest, Uint8)
+{
+  // Need to Implement GetDequantizedOutput Function.
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/MaxPool2D.cpp b/compiler/luci-interpreter/src/kernels/MaxPool2D.cpp
new file mode 100644
index 000000000..afecf9058
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/MaxPool2D.cpp
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/MaxPool2D.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/pooling.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+MaxPool2D::MaxPool2D(const Tensor *input, Tensor *output, const Pool2DParams &params)
+    : KernelWithParams<Pool2DParams>({input}, {output}, params)
+{
+}
+
+void MaxPool2D::configure()
+{
+  assert(input()->element_type() == output()->element_type());
+  assert(input()->shape().num_dims() == 4);
+  const Shape &input_shape = input()->shape();
+  const int32_t batches = input_shape.dim(0);
+  const int32_t input_height = input_shape.dim(1);
+  const int32_t input_width = input_shape.dim(2);
+  const int32_t depth = input_shape.dim(3);
+
+  const int32_t output_height = computeOutputSize(_params.padding, input_height,
+                                                  _params.filter_height, _params.stride_height);
+  const int32_t output_width =
+      computeOutputSize(_params.padding, input_width, _params.filter_width, _params.stride_width);
+
+  _padding_height =
+      computePadding(_params.stride_height, 1, input_height, _params.filter_height, output_height);
+  _padding_width =
+      computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width);
+
+  output()->resize({batches, output_height, output_width, depth});
+  if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S8)
+  {
+    assert(input()->scale() == output()->scale());
+    assert(input()->zero_point() == output()->zero_point());
+  }
+}
+
+void MaxPool2D::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void MaxPool2D::evalFloat() const
+{
+  float activation_min{};
+  float activation_max{};
+  calculateActivationRange(_params.activation, &activation_min, &activation_max);
+
+  tflite::PoolParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.filter_height = _params.filter_height;
+  params.filter_width = _params.filter_width;
+  params.float_activation_min = activation_min;
+  params.float_activation_max = activation_max;
+
+  tflite::reference_ops::MaxPool(params, getTensorShape(input()), getTensorData<float>(input()),
+                                 getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void MaxPool2D::evalQuantized() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::PoolParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.filter_height = _params.filter_height;
+  params.filter_width = _params.filter_width;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  tflite::reference_ops::MaxPool(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+                                 getTensorShape(output()), getTensorData<uint8_t>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/MaxPool2D.h b/compiler/luci-interpreter/src/kernels/MaxPool2D.h
new file mode 100644
index 000000000..7a59ff022
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/MaxPool2D.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_MAXPOOL2D_H
+#define LUCI_INTERPRETER_KERNELS_MAXPOOL2D_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class MaxPool2D : public KernelWithParams<Pool2DParams>
+{
+public:
+  MaxPool2D(const Tensor *input, Tensor *output, const Pool2DParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _padding_height{};
+  int32_t _padding_width{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_MAXPOOL2D_H
diff --git a/compiler/luci-interpreter/src/kernels/MaxPool2D.test.cpp b/compiler/luci-interpreter/src/kernels/MaxPool2D.test.cpp
new file mode 100644
index 000000000..390255d89
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/MaxPool2D.test.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/MaxPool2D.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(MaxPool2DTest, Float)
+{
+  Shape input_shape{1, 3, 5, 1};
+  std::vector<float> input_data{
+      1,  -1, 0,  -2, 2,  //
+      -7, -6, -5, -4, -3, //
+      5,  4,  3,  6,  7,  //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 3;
+  params.stride_height = 1;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  MaxPool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+      1, 2, //
+      5, 6, //
+  };
+  std::initializer_list<int32_t> ref_output_shape{1, 2, 2, 1};
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST(MaxPool2DTest, Uint8)
+{
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375, 15.9375);
+  std::vector<float> input_data{
+      0,  -6, 12, 4, //
+      -3, -2, 10, 7, //
+  };
+  Tensor input_tensor{DataType::U8, {1, 2, 4, 1}, {{quant_param.first}, {quant_param.second}}, ""};
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+  std::vector<uint8_t> quantize_input =
+      quantize<uint8_t>(input_data, quant_param.first, quant_param.second);
+  input_tensor.writeData(quantize_input.data(), quantize_input.size() * sizeof(uint8_t));
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 2;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  MaxPool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.0, 6.0};
+  std::initializer_list<int32_t> ref_output_shape{1, 1, 2, 1};
+  EXPECT_THAT(dequantize<uint8_t>(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(),
+                                  output_tensor.zero_point()),
+              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Mean.cpp b/compiler/luci-interpreter/src/kernels/Mean.cpp
new file mode 100644
index 000000000..2394e2c0e
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Mean.cpp
@@ -0,0 +1,249 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Mean.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+static void resolveAxes(const int *axes_data, int num_axes, tflite::MeanParams *params)
+{
+  params->axis_count = num_axes;
+  for (int i = 0; i < num_axes; ++i)
+  {
+    params->axis[i] = static_cast<int16>(axes_data[i]);
+  }
+  for (int i = num_axes; i < 4; ++i)
+  {
+    params->axis[i] = 1;
+  }
+}
+
+// Returns the number of axes that will be reduced. Removes duplicates.
+static int getAxisReductionCount(const int *axes_data, int num_axes, int input_num_dims)
+{
+  int reduction_count = num_axes;
+  for (int i = 0; i < num_axes; ++i)
+  {
+    int current = axes_data[i] >= 0 ? axes_data[i] : axes_data[i] + input_num_dims;
+    assert(current >= 0 && current < input_num_dims);
+    for (int j = 0; j < i; j++)
+    {
+      int previous = axes_data[j] >= 0 ? axes_data[j] : axes_data[j] + input_num_dims;
+      // This checks for duplicate axis
+      if (current == previous)
+      {
+        --reduction_count;
+        break;
+      }
+    }
+  }
+  return reduction_count;
+}
+
+static Shape getOutputShape(const Shape &input_shape, const int *axes_data, int num_axes,
+                            bool keep_dims)
+{
+  int input_num_dims = input_shape.num_dims();
+  if (input_num_dims == 0)
+  {
+    return Shape(0);
+  }
+
+  if (keep_dims)
+  {
+    Shape output_shape(input_num_dims);
+    for (int idx = 0; idx < input_num_dims; ++idx)
+    {
+      bool is_axis = false;
+      for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
+      {
+        if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
+        {
+          is_axis = true;
+          break;
+        }
+      }
+      if (is_axis)
+      {
+        output_shape.dim(idx) = 1;
+      }
+      else
+      {
+        output_shape.dim(idx) = input_shape.dim(idx);
+      }
+    }
+    return output_shape;
+  }
+  else
+  {
+    int num_reduce_axes = getAxisReductionCount(axes_data, num_axes, input_num_dims);
+    Shape output_shape(input_num_dims - num_reduce_axes);
+    int num_skip_axes = 0;
+    for (int idx = 0; idx < input_num_dims; ++idx)
+    {
+      bool is_axis = false;
+      for (int axis_idx = 0; axis_idx < num_axes; ++axis_idx)
+      {
+        if (axes_data[axis_idx] == idx || axes_data[axis_idx] + input_num_dims == idx)
+        {
+          ++num_skip_axes;
+          is_axis = true;
+          break;
+        }
+      }
+      if (!is_axis)
+      {
+        output_shape.dim(idx - num_skip_axes) = input_shape.dim(idx);
+      }
+    }
+    return output_shape;
+  }
+}
+
+Mean::Mean(const Tensor *input, const Tensor *axes, Tensor *output, const ReducerParams &params)
+    : KernelWithParams<ReducerParams>({input, axes}, {output}, params)
+{
+}
+
+void Mean::configure()
+{
+  assert(input()->element_type() == output()->element_type());
+  assert(axes()->element_type() == DataType::S32);
+  const Shape &input_shape = input()->shape();
+  int input_num_dims = input_shape.num_dims();
+
+  const auto *axes_data = getTensorData<int32_t>(axes());
+  int num_axes = axes()->shape().num_elements();
+  assert(num_axes <= 4);
+
+  Shape output_shape = getOutputShape(input_shape, axes_data, num_axes, _params.keep_dims);
+  output()->resize(output_shape);
+
+  tflite::MeanParams params{};
+  resolveAxes(axes_data, num_axes, &params);
+  const bool need_temporaries =
+      !(_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
+        ((params.axis[0] == 1 && params.axis[1] == 2) ||
+         (params.axis[0] == 2 && params.axis[1] == 1)));
+  if (need_temporaries)
+  {
+    _temp_index =
+        std::make_unique<Tensor>(DataType::S32, Shape(input_num_dims), AffineQuantization{}, "");
+    _resolved_axes =
+        std::make_unique<Tensor>(DataType::S32, Shape(num_axes), AffineQuantization{}, "");
+    _temp_sum = std::make_unique<Tensor>(input()->element_type(), output()->shape(),
+                                         AffineQuantization{}, "");
+  }
+}
+
+void Mean::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Mean::evalFloat() const
+{
+  const Shape &input_shape = input()->shape();
+  int input_num_dims = input_shape.num_dims();
+  const auto *axes_data = getTensorData<int32_t>(axes());
+  int num_axes = axes()->shape().num_elements();
+
+  tflite::MeanParams params{};
+  resolveAxes(axes_data, num_axes, &params);
+
+  // Defer to specialized implementation for 4D Mean across axes 1 & 2.
+  if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
+      ((params.axis[0] == 1 && params.axis[1] == 2) ||
+       (params.axis[0] == 2 && params.axis[1] == 1)))
+  {
+    tflite::reference_ops::Mean(params, getTensorShape(input()), getTensorData<float>(input()),
+                                getTensorShape(output()), getTensorData<float>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Mean(
+        getTensorData<float>(input()), getTensorShape(input()).DimsData(),
+        input()->shape().num_dims(), getTensorData<float>(output()),
+        getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
+        _params.keep_dims, getTensorData<int>(_temp_index.get()),
+        getTensorData<int>(_resolved_axes.get()), getTensorData<float>(_temp_sum.get()));
+  }
+}
+
+void Mean::evalQuantized() const
+{
+  const Shape &input_shape = input()->shape();
+  int input_num_dims = input_shape.num_dims();
+  const auto *axes_data = getTensorData<int32_t>(axes());
+  int num_axes = axes()->shape().num_elements();
+
+  tflite::MeanParams params{};
+  resolveAxes(axes_data, num_axes, &params);
+
+  // Defer to specialized implementation for 4D Mean across axes 1 & 2.
+  if (_params.keep_dims && input_num_dims == 4 && params.axis_count == 2 &&
+      ((params.axis[0] == 1 && params.axis[1] == 2) ||
+       (params.axis[0] == 2 && params.axis[1] == 1)))
+  {
+    tflite::reference_ops::Mean(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+                                input()->zero_point(), input()->scale(), getTensorShape(output()),
+                                getTensorData<uint8_t>(output()), output()->zero_point(),
+                                output()->scale());
+  }
+  else if (input()->zero_point() == output()->zero_point() && input()->scale() == output()->scale())
+  {
+    tflite::reference_ops::Mean(
+        getTensorData<uint8_t>(input()), getTensorShape(input()).DimsData(),
+        input()->shape().num_dims(), getTensorData<uint8_t>(output()),
+        getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
+        _params.keep_dims, getTensorData<int>(_temp_index.get()),
+        getTensorData<int>(_resolved_axes.get()), getTensorData<int>(_temp_sum.get()));
+  }
+  else
+  {
+    tflite::reference_ops::QuantizedMeanOrSum<>(
+        getTensorData<uint8_t>(input()), input()->zero_point(), input()->scale(),
+        getTensorShape(input()).DimsData(), input()->shape().num_dims(),
+        getTensorData<uint8_t>(output()), output()->zero_point(), output()->scale(),
+        getTensorShape(output()).DimsData(), output()->shape().num_dims(), axes_data, num_axes,
+        _params.keep_dims, getTensorData<int>(_temp_index.get()),
+        getTensorData<int>(_resolved_axes.get()), getTensorData<int>(_temp_sum.get()),
+        /*compute_sum=*/false);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Mean.h b/compiler/luci-interpreter/src/kernels/Mean.h
new file mode 100644
index 000000000..9cc793c72
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Mean.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_MEAN_H
+#define LUCI_INTERPRETER_KERNELS_MEAN_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <memory>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Mean : public KernelWithParams<ReducerParams>
+{
+public:
+  Mean(const Tensor *input, const Tensor *axes, Tensor *output, const ReducerParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *axes() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+
+private:
+  std::unique_ptr<Tensor> _temp_index;
+  std::unique_ptr<Tensor> _resolved_axes;
+  std::unique_ptr<Tensor> _temp_sum;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_MEAN_H
diff --git a/compiler/luci-interpreter/src/kernels/Mean.test.cpp b/compiler/luci-interpreter/src/kernels/Mean.test.cpp
new file mode 100644
index 000000000..f4e411ca4
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Mean.test.cpp
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Mean.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(MeanTest, FloatKeepDims)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+  std::vector<int32_t> axis_data{0, 2};
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data);
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ReducerParams params{};
+  params.keep_dims = true;
+
+  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{10.5, 12.5, 14.5};
+  std::initializer_list<int32_t> ref_output_shape{1, 3, 1};
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST(MeanTest, FloatKeepDims4DMean)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+  std::vector<int32_t> axis_data{1, 2};
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 3, 2}, input_data);
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({2}, axis_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ReducerParams params{};
+  params.keep_dims = true;
+
+  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{6, 7, 18, 19};
+  std::initializer_list<int32_t> ref_output_shape{2, 1, 1, 2};
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST(MeanTest, FloatNotKeepDims)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+
+  std::vector<int32_t> axis_data{1, 0, -3, -3};
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({4, 3, 2}, input_data);
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({4}, axis_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ReducerParams params{};
+  params.keep_dims = false;
+
+  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{12, 13};
+  std::initializer_list<int32_t> ref_output_shape{2};
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST(MeanTest, Uint8KeepDims)
+{
+  float kQuantizedTolerance = getTolerance(-1.0, 1.0, 255);
+  std::vector<float> input_data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6};
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
+
+  std::vector<int32_t> axis_data{1};
+  Tensor input_tensor{DataType::U8, {3, 2}, {{quant_param.first}, {quant_param.second}}, ""};
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data);
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+  std::vector<uint8_t> quantize_input =
+      quantize<uint8_t>(input_data, quant_param.first, quant_param.second);
+  input_tensor.writeData(quantize_input.data(), quantize_input.size() * sizeof(uint8_t));
+
+  ReducerParams params{};
+  params.keep_dims = true;
+
+  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.3, 0.35, 0.55};
+  std::initializer_list<int32_t> ref_output_shape{3, 1};
+  EXPECT_THAT(dequantize<uint8_t>(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(),
+                                  output_tensor.zero_point()),
+              ElementsAreArray(ArrayFloatNear(ref_output_data, kQuantizedTolerance)));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST(MeanTest, Uint8NotKeepDims)
+{
+  float kQuantizedTolerance = getTolerance(-1.0, 1.0, 255);
+  std::vector<float> input_data = {0.4, 0.2, 0.3, 0.4, 0.5, 0.6};
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
+
+  std::vector<int32_t> axis_data{1};
+  Tensor input_tensor{DataType::U8, {1, 3, 2}, {{quant_param.first}, {quant_param.second}}, ""};
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data);
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+  std::vector<uint8_t> quantize_input =
+      quantize<uint8_t>(input_data, quant_param.first, quant_param.second);
+  input_tensor.writeData(quantize_input.data(), quantize_input.size() * sizeof(uint8_t));
+
+  ReducerParams params{};
+  params.keep_dims = false;
+
+  Mean kernel(&input_tensor, &axis_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0.4, 0.4};
+  std::initializer_list<int32_t> ref_output_shape{1, 2};
+  EXPECT_THAT(dequantize<uint8_t>(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(),
+                                  output_tensor.zero_point()),
+              ElementsAreArray(ArrayFloatNear(ref_output_data, kQuantizedTolerance)));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Mul.cpp b/compiler/luci-interpreter/src/kernels/Mul.cpp
new file mode 100644
index 000000000..a6e721a09
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Mul.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Mul.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Mul::Mul(const Tensor *input1, const Tensor *input2, Tensor *output, const MulParams &params)
+    : KernelWithParams<MulParams>({input1, input2}, {output}, params)
+{
+}
+
+void Mul::configure()
+{
+  assert(input1()->element_type() == input2()->element_type());
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Mul::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Mul::evalFloat() const
+{
+  float activation_min{};
+  float activation_max{};
+  calculateActivationRange(_params.activation, &activation_min, &activation_max);
+
+  tflite::ArithmeticParams params{};
+  params.float_activation_min = activation_min;
+  params.float_activation_max = activation_max;
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+      getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastMul4DSlow(
+        params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+        getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Mul(params, getTensorShape(input1()), getTensorData<float>(input1()),
+                               getTensorShape(input2()), getTensorData<float>(input2()),
+                               getTensorShape(output()), getTensorData<float>(output()));
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Mul.h b/compiler/luci-interpreter/src/kernels/Mul.h
new file mode 100644
index 000000000..e46160bcb
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Mul.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_MUL_H
+#define LUCI_INTERPRETER_KERNELS_MUL_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <cstdint>
+#include <vector>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Mul : public KernelWithParams<MulParams>
+{
+public:
+  Mul(const Tensor *input1, const Tensor *input2, Tensor *output, const MulParams &params);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_MUL_H
diff --git a/compiler/luci-interpreter/src/kernels/Mul.test.cpp b/compiler/luci-interpreter/src/kernels/Mul.test.cpp
new file mode 100644
index 000000000..f2255ac3f
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Mul.test.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Mul.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(MulTest, Float)
+{
+  Shape base_shape = {2, 3, 1, 2};
+  std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  std::vector<std::vector<float>> test_outputs = {
+      {0.00f, 0.69f, 0.12f, 1.15f, 0.00f, 2.07f, 0.18f, 0.15f, 0.00f, 0.25f, 0.90f, 0.45f,
+       0.16f, 0.00f, 0.00f, 0.00f, 0.80f, 0.00f, 0.24f, 0.84f, 0.00f, 1.40f, 1.20f, 2.52f,
+       0.00f, 0.00f, 0.64f, 0.00f, 0.00f, 0.00f, 0.14f, 0.00f, 0.00f, 0.00f, 0.70f, 0.00f},
+      {0.00f, 0.69f, 0.00f, 0.25f, 0.80f, 0.00f, 0.24f, 0.84f, 0.64f, 0.00f, 0.70f, 0.00f},
+      {0.00f, 0.46f, 0.00f, 0.69f, 0.12f, 0.00f, 0.18f, 0.10f, 0.27f, 0.15f, 0.00f, 0.00f,
+       0.16f, 0.00f, 0.24f, 0.00f, 0.00f, 0.44f, 0.60f, 1.40f, 1.20f, 2.80f, 1.08f, 2.52f,
+       0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.00f, 0.35f, 0.00f, 0.70f, 0.00f, 0.63f, 0.00f},
+      {0.00f, 0.46f, 0.27f, 0.15f, 0.00f, 0.44f, 0.60f, 1.40f, 0.00f, 0.00f, 0.63f, 0.00f}};
+  std::vector<float> input1_data{-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
+                                 1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
+  std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data);
+    Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data);
+    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+    MulParams params{};
+    params.activation = Activation::RELU;
+
+    Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor),
+                ::testing::ElementsAreArray(ArrayFloatNear(test_outputs[i], 0.0001f)))
+        << "With shape number " << i;
+  }
+  // Re-run with exchanged inputs.
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data);
+    Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data);
+    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+    MulParams params{};
+    params.activation = Activation::RELU;
+
+    Mul kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor),
+                ::testing::ElementsAreArray(ArrayFloatNear(test_outputs[i], 0.0001f)))
+        << "With shape number " << i;
+  }
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Pad.cpp b/compiler/luci-interpreter/src/kernels/Pad.cpp
new file mode 100644
index 000000000..bdf3a2a95
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Pad.cpp
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pad.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Pad::Pad(const Tensor *input, const Tensor *paddings, Tensor *output)
+    : Kernel({input, paddings}, {output})
+{
+}
+
+void Pad::configure()
+{
+  const Shape &input_shape = input()->shape();
+  const int num_dims = input_shape.num_dims();
+
+  if (num_dims > 4)
+    throw std::runtime_error("Unsupported number of dimensions.");
+
+  assert(output()->element_type() == input()->element_type());
+  assert(paddings()->element_type() == DataType::S32);
+  // Paddings shape should be [N, 2].
+  assert(paddings()->shape().num_dims() == 2);
+  assert(paddings()->shape().dim(0) == num_dims);
+  assert(paddings()->shape().dim(1) == 2);
+
+  Shape output_shape(num_dims);
+  const auto *paddings_data = getTensorData<int32_t>(paddings());
+  for (int i = 0; i < num_dims; ++i)
+  {
+    const int32_t padding_before = paddings_data[i * 2];
+    const int32_t padding_after = paddings_data[i * 2 + 1];
+    assert(padding_before >= 0 && padding_after >= 0);
+    output_shape.dim(i) = input_shape.dim(i) + padding_before + padding_after;
+  }
+
+  output()->resize(output_shape);
+}
+
+void Pad::execute() const
+{
+  const int num_dims = input()->shape().num_dims();
+
+  tflite::PadParams params{};
+  params.left_padding_count = num_dims;
+  params.right_padding_count = num_dims;
+
+  const auto *paddings_data = getTensorData<int32_t>(paddings());
+  for (int i = num_dims - 1; i >= 0; --i)
+  {
+    params.left_padding[i] = paddings_data[i * 2];
+    params.right_padding[i] = paddings_data[i * 2 + 1];
+  }
+
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+    {
+      const float pad_value = 0.0f;
+      tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<float>(input()),
+                                 &pad_value, getTensorShape(output()),
+                                 getTensorData<float>(output()));
+      break;
+    }
+    case DataType::U8:
+    {
+      assert(output()->zero_point() >= std::numeric_limits<uint8_t>::min());
+      assert(output()->zero_point() <= std::numeric_limits<uint8_t>::max());
+      const auto pad_value = static_cast<uint8_t>(output()->zero_point());
+      tflite::reference_ops::Pad(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+                                 &pad_value, getTensorShape(output()),
+                                 getTensorData<uint8_t>(output()));
+      break;
+    }
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Pad.h b/compiler/luci-interpreter/src/kernels/Pad.h
new file mode 100644
index 000000000..e05b47f29
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Pad.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_PAD_H
+#define LUCI_INTERPRETER_KERNELS_PAD_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Pad : public Kernel
+{
+public:
+  Pad(const Tensor *input, const Tensor *paddings, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *paddings() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_PAD_H
diff --git a/compiler/luci-interpreter/src/kernels/Pad.test.cpp b/compiler/luci-interpreter/src/kernels/Pad.test.cpp
new file mode 100644
index 000000000..15fcd0da3
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Pad.test.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pad.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+float GetTolerance(float min, float max) { return (max - min) / 255.0; }
+
+TEST(Pad, Uint8)
+{
+  float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
+  std::vector<float> input_data{-0.8, 0.2, 0.9, 0.7, 0.1, -0.3};
+  std::vector<int32_t> paddings_data{0, 0, 0, 2, 1, 3, 0, 0};
+  Tensor input_tensor{DataType::U8, {1, 2, 3, 1}, {{quant_param.first}, {quant_param.second}}, ""};
+  Tensor paddings_tensor = makeInputTensor<DataType::S32>({4, 2}, paddings_data);
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+  std::vector<uint8_t> quantize_input =
+      quantize<uint8_t>(input_data, quant_param.first, quant_param.second);
+  input_tensor.writeData(quantize_input.data(), quantize_input.size() * sizeof(uint8_t));
+
+  Pad kernel(&input_tensor, &paddings_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0, -0.8, 0.2, 0.9, 0, 0, 0, 0, 0.7, 0.1, -0.3, 0, 0, 0,
+                                     0, 0,    0,   0,   0, 0, 0, 0, 0,   0,   0,    0, 0, 0};
+  EXPECT_THAT(dequantize(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(),
+                         output_tensor.zero_point()),
+              ElementsAreArray(ArrayFloatNear(ref_output_data, kQuantizedTolerance)));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 7, 1}));
+}
+
+TEST(Pad, Float)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6};
+  std::vector<int32_t> paddings_data{1, 0, 0, 2, 0, 3, 0, 0};
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 2, 3, 1}, input_data);
+  Tensor paddings_tensor = makeInputTensor<DataType::S32>({4, 2}, paddings_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pad kernel(&input_tensor, &paddings_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                     0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 0, 0, 0, 4, 5,
+                                     6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  std::initializer_list<int32_t> ref_output_shape{2, 4, 6, 1};
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Reshape.cpp b/compiler/luci-interpreter/src/kernels/Reshape.cpp
new file mode 100644
index 000000000..d88b5392a
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Reshape.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Reshape.h"
+
+#include <cassert>
+#include <cstring>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+static Shape extractShapeFromTensor(const Tensor *tensor)
+{
+  assert(tensor->element_type() == DataType::S32);
+  Shape shape(tensor->shape().num_elements());
+  const auto *shape_data = tensor->data<int32_t>();
+  for (int i = 0; i < tensor->shape().num_elements(); ++i)
+  {
+    shape.dim(i) = shape_data[i];
+  }
+  return shape;
+}
+
+static void resolveUnknownDimension(const Shape &input_shape, Shape *output_shape)
+{
+  const int32_t num_input_elements = input_shape.num_elements();
+  int32_t num_output_elements = 1;
+  int unknown_dim_index = -1;
+  for (int i = 0; i < output_shape->num_dims(); ++i)
+  {
+    const int32_t value = output_shape->dim(i);
+    if (value == -1)
+    {
+      assert(unknown_dim_index == -1);
+      unknown_dim_index = i;
+    }
+    else
+    {
+      num_output_elements *= value;
+    }
+  }
+  if (unknown_dim_index != -1)
+  {
+    output_shape->dim(unknown_dim_index) = num_input_elements / num_output_elements;
+    num_output_elements *= output_shape->dim(unknown_dim_index);
+  }
+  assert(num_output_elements == num_input_elements);
+}
+
+Reshape::Reshape(const Tensor *input, const Tensor *shape, Tensor *output)
+    : Kernel({input, shape}, {output})
+{
+}
+
+void Reshape::configure()
+{
+  Shape output_shape = extractShapeFromTensor(shape());
+  resolveUnknownDimension(input()->shape(), &output_shape);
+  output()->resize(output_shape);
+}
+
+void Reshape::execute() const
+{
+  const auto *input_data = input()->data<void>();
+  auto *output_data = output()->data<void>();
+
+  const size_t element_size = getDataTypeSize(input()->element_type());
+  const int32_t num_elements = input()->shape().num_elements();
+  std::memcpy(output_data, input_data, num_elements * element_size);
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Reshape.h b/compiler/luci-interpreter/src/kernels/Reshape.h
new file mode 100644
index 000000000..99b947f77
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Reshape.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RESHAPE_H
+#define LUCI_INTERPRETER_KERNELS_RESHAPE_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Reshape : public Kernel
+{
+public:
+  Reshape(const Tensor *input, const Tensor *shape, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *shape() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RESHAPE_H
diff --git a/compiler/luci-interpreter/src/kernels/Reshape.test.cpp b/compiler/luci-interpreter/src/kernels/Reshape.test.cpp
new file mode 100644
index 000000000..7255b8132
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Reshape.test.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Reshape.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+// TODO Test types other than FLOAT32.
+
+TEST(ReshapeTest, Regular)
+{
+  Shape input_shape{1, 2, 2, 3};
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  Shape shape_shape{2};
+  std::vector<int32_t> shape_data{3, 4};
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor shape_tensor = makeInputTensor<DataType::S32>(shape_shape, shape_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Reshape kernel(&input_tensor, &shape_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ElementsAreArray(ArrayFloatNear(input_data)));
+}
+
+TEST(ReshapeTest, UnknownDimension)
+{
+  Shape input_shape{2, 1, 2, 3};
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  Shape shape_shape{3};
+  std::vector<int32_t> shape_data{2, -1, 2};
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor shape_tensor = makeInputTensor<DataType::S32>(shape_shape, shape_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Reshape kernel(&input_tensor, &shape_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ElementsAreArray(ArrayFloatNear(input_data)));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Softmax.cpp b/compiler/luci-interpreter/src/kernels/Softmax.cpp
new file mode 100644
index 000000000..2fb7f3f2c
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Softmax.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Softmax.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/softmax.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Softmax::Softmax(const Tensor *input, Tensor *output, const SoftmaxParams &params)
+    : KernelWithParams<SoftmaxParams>({input}, {output}, params)
+{
+}
+
+void Softmax::configure()
+{
+  assert(input()->element_type() == output()->element_type());
+  output()->resize(input()->shape());
+}
+
+void Softmax::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Softmax::evalFloat() const
+{
+  tflite::SoftmaxParams params{};
+  params.beta = _params.beta;
+
+  tflite::reference_ops::Softmax(params, getTensorShape(input()), getTensorData<float>(input()),
+                                 getTensorShape(output()), getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Softmax.h b/compiler/luci-interpreter/src/kernels/Softmax.h
new file mode 100644
index 000000000..2e4eda492
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Softmax.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SOFTMAX_H
+#define LUCI_INTERPRETER_KERNELS_SOFTMAX_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Softmax : public KernelWithParams<SoftmaxParams>
+{
+public:
+  Softmax(const Tensor *input, Tensor *output, const SoftmaxParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SOFTMAX_H
diff --git a/compiler/luci-interpreter/src/kernels/Softmax.test.cpp b/compiler/luci-interpreter/src/kernels/Softmax.test.cpp
new file mode 100644
index 000000000..2193c3e83
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Softmax.test.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Softmax.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(SoftmaxTest, Float)
+{
+  Shape input_shape{2, 1, 2, 3};
+  std::vector<float> input_data{
+      5,  -9, 8,  //
+      -7, 2,  -4, //
+      1,  -2, 9,  //
+      3,  -6, -1, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  SoftmaxParams params{};
+  params.beta = 0.1;
+
+  Softmax kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+      0.38514, 0.09497, 0.51989, //
+      0.20792, 0.51141, 0.28067, //
+      0.25212, 0.18678, 0.56110, //
+      0.48149, 0.19576, 0.32275, //
+  };
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/SpaceToDepth.cpp b/compiler/luci-interpreter/src/kernels/SpaceToDepth.cpp
new file mode 100644
index 000000000..6a5bd7cf8
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SpaceToDepth.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SpaceToDepth.h"
+#include "Utils.h"
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+SpaceToDepth::SpaceToDepth(const Tensor *input, Tensor *output, const SpaceToDepthParams &params)
+    : KernelWithParams<SpaceToDepthParams>({input}, {output}, params)
+{
+}
+
+void SpaceToDepth::configure()
+{
+  assert(input()->shape().num_dims() == 4);
+  assert(output()->element_type() == DataType::FLOAT32 ||
+         output()->element_type() == DataType::U8 || output()->element_type() == DataType::S8 ||
+         output()->element_type() == DataType::S32 || output()->element_type() == DataType::S64);
+  assert(input()->element_type() == output()->element_type());
+
+  const int block_size = params().block_size;
+  const int32_t input_height = input()->shape().dim(1);
+  const int32_t input_width = input()->shape().dim(2);
+  int32_t output_height = input_height / block_size;
+  int32_t output_width = input_width / block_size;
+
+  assert(input_height == output_height * block_size);
+  assert(input_width == output_width * block_size);
+
+  Shape output_shape(4);
+  output_shape.dim(0) = input()->shape().dim(0);
+  output_shape.dim(1) = output_height;
+  output_shape.dim(2) = output_width;
+  output_shape.dim(3) = input()->shape().dim(3) * block_size * block_size;
+
+  output()->resize(output_shape);
+}
+
+void SpaceToDepth::execute() const
+{
+  tflite::SpaceToDepthParams op_params{};
+  op_params.block_size = params().block_size;
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::optimized_ops::SpaceToDepth(op_params, getTensorShape(input()),
+                                          getTensorData<float>(input()), getTensorShape(output()),
+                                          getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      tflite::optimized_ops::SpaceToDepth(op_params, getTensorShape(input()),
+                                          getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                          getTensorData<uint8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/SpaceToDepth.h b/compiler/luci-interpreter/src/kernels/SpaceToDepth.h
new file mode 100644
index 000000000..e66316b11
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SpaceToDepth.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SPACETODEPTH_H
+#define LUCI_INTERPRETER_KERNELS_SPACETODEPTH_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+#include <vector>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class SpaceToDepth : public KernelWithParams<SpaceToDepthParams>
+{
+public:
+  SpaceToDepth(const Tensor *input, Tensor *output, const SpaceToDepthParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SPACETODEPTH_H
diff --git a/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp b/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp
new file mode 100644
index 000000000..e4a0fd642
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/SpaceToDepth.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T> class SpaceToDepthTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_CASE(SpaceToDepthTest, DataTypes);
+
+TYPED_TEST(SpaceToDepthTest, SimpleCase)
+{
+  std::vector<TypeParam> input_data{1, 5, 6, 7, 2, 3, 4, 8};
+  Shape input_shape{1, 2, 2, 2};
+  Tensor input_tensor{getElementType<TypeParam>(), input_shape, {{}, {}}, ""};
+  input_tensor.writeData(input_data.data(), input_data.size() * sizeof(TypeParam));
+  std::vector<TypeParam> output_data{1, 5, 6, 7, 2, 3, 4, 8};
+  std::vector<int32_t> output_shape{1, 1, 1, 8};
+  Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>());
+
+  SpaceToDepthParams params{};
+  params.block_size = 2;
+
+  SpaceToDepth kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<TypeParam>(output_tensor),
+              ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Split.cpp b/compiler/luci-interpreter/src/kernels/Split.cpp
new file mode 100644
index 000000000..325b1c22f
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Split.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Split.h"
+
+#include "Utils.h"
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Split::Split(const Tensor *axis, const Tensor *input, std::vector<Tensor *> outputs)
+    : Kernel({axis, input}, std::move(outputs))
+{
+}
+
+void Split::configure()
+{
+  assert(axis()->shape().num_elements() == 1);
+  _axis_value = getTensorData<int32_t>(axis())[0];
+  if (_axis_value < 0)
+    _axis_value += input()->shape().num_dims();
+  assert(_axis_value >= 0 && _axis_value < input()->shape().num_dims());
+
+  const int32_t input_size = input()->shape().dim(_axis_value);
+  assert(input_size % _outputs.size() == 0);
+  const int32_t slice_size = input_size / _outputs.size();
+
+  Shape output_shape = input()->shape();
+  output_shape.dim(_axis_value) = slice_size;
+  for (Tensor *output : _outputs)
+  {
+    output->resize(output_shape);
+  }
+}
+
+void Split::execute() const
+{
+  tflite::SplitParams params{};
+  params.num_split = _outputs.size();
+  params.axis = _axis_value;
+
+#define TF_LITE_SPLIT(scalar)                                                                     \
+  {                                                                                               \
+    VectorOfTensors<scalar, false> all_outputs(_outputs);                                         \
+    tflite::optimized_ops::Split(params, getTensorShape(input()), getTensorData<scalar>(input()), \
+                                 all_outputs.shapes(), all_outputs.data());                       \
+  }
+
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      TF_LITE_SPLIT(float);
+      break;
+    case DataType::U8:
+      TF_LITE_SPLIT(uint8_t);
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+#undef TF_LITE_SPLIT
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Split.h b/compiler/luci-interpreter/src/kernels/Split.h
new file mode 100644
index 000000000..9542b1e56
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Split.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SPLIT_H
+#define LUCI_INTERPRETER_KERNELS_SPLIT_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Split : public Kernel
+{
+public:
+  Split(const Tensor *axis, const Tensor *input, std::vector<Tensor *> outputs);
+
+  const Tensor *axis() const { return _inputs[0]; }
+  const Tensor *input() const { return _inputs[1]; }
+  Tensor *output(int index) const { return _outputs[index]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  int32_t _axis_value{};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SPLIT_H
diff --git a/compiler/luci-interpreter/src/kernels/Split.test.cpp b/compiler/luci-interpreter/src/kernels/Split.test.cpp
new file mode 100644
index 000000000..11d0b1ea9
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Split.test.cpp
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Split.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(int axis, int num_splits, std::initializer_list<int32_t> input_shape,
+           std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data,
+           std::vector<std::vector<T>> output_data, DataType element_type)
+{
+  Tensor axis_tensor = makeInputTensor<DataType::S32>({}, {axis});
+  Tensor input_tensor{element_type, input_shape, {}, ""};
+  input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(T));
+
+  std::vector<Tensor> output_tensors;
+  output_tensors.reserve(num_splits);
+  for (int i = 0; i < num_splits; ++i)
+  {
+    output_tensors.emplace_back(makeOutputTensor(element_type));
+  }
+
+  std::vector<Tensor *> output_tensor_ptrs(num_splits);
+  for (int i = 0; i < num_splits; ++i)
+  {
+    output_tensor_ptrs[i] = &output_tensors[i];
+  }
+
+  Split kernel(&axis_tensor, &input_tensor, std::move(output_tensor_ptrs));
+  kernel.configure();
+  kernel.execute();
+
+  for (int i = 0; i < num_splits; ++i)
+  {
+    EXPECT_THAT(extractTensorData<T>(output_tensors[i]),
+                ::testing::ElementsAreArray(output_data[i]));
+  }
+}
+
+template <typename T> class SplitTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_CASE(SplitTest, DataTypes);
+
+TYPED_TEST(SplitTest, FourDimensional)
+{
+  Check<TypeParam>(/*axis=*/0, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2},
+                   {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+                   {
+                       {1, 2, 3, 4, 5, 6, 7, 8},        //
+                       {9, 10, 11, 12, 13, 14, 15, 16}, //
+                   },
+                   getElementType<TypeParam>());
+  Check<TypeParam>(
+      /*axis=*/1, /*num_splits=*/2, {2, 2, 2, 2}, {2, 1, 2, 2},
+      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+      {
+          {1, 2, 3, 4, 9, 10, 11, 12},  //
+          {5, 6, 7, 8, 13, 14, 15, 16}, //
+      },
+      getElementType<TypeParam>());
+  Check<TypeParam>(
+      /*axis=*/2, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 1, 2},
+      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+      {
+          {1, 2, 5, 6, 9, 10, 13, 14},  //
+          {3, 4, 7, 8, 11, 12, 15, 16}, //
+      },
+      getElementType<TypeParam>());
+  Check<TypeParam>(
+      /*axis=*/3, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 2, 1},
+      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+      {
+          {1, 3, 5, 7, 9, 11, 13, 15},  //
+          {2, 4, 6, 8, 10, 12, 14, 16}, //
+      },
+      getElementType<TypeParam>());
+}
+
+TYPED_TEST(SplitTest, OneDimensional)
+{
+  Check<TypeParam>(
+      /*axis=*/0, /*num_splits=*/8, {8}, {1}, {1, 2, 3, 4, 5, 6, 7, 8},
+      {{1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}}, getElementType<TypeParam>());
+}
+
+TYPED_TEST(SplitTest, NegativeAxis)
+{
+  Check<TypeParam>(
+      /*axis=*/-4, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2},
+      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+      {
+          {1, 2, 3, 4, 5, 6, 7, 8}, //
+          {9, 10, 11, 12, 13, 14, 15, 16},
+      },
+      getElementType<TypeParam>());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Squeeze.cpp b/compiler/luci-interpreter/src/kernels/Squeeze.cpp
new file mode 100644
index 000000000..ce43ef789
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Squeeze.cpp
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Squeeze.h"
+
+#include "kernels/Utils.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Squeeze::Squeeze(const Tensor *input, Tensor *output, const SqueezeParams &params)
+    : KernelWithParams<SqueezeParams>({input}, {output}, params)
+{
+}
+
+void Squeeze::configure()
+{
+  int input_num_dims = input()->shape().num_dims();
+  int num_squeeze_dims = params().squeeze_dims.size();
+  assert(input_num_dims <= 8);
+  bool should_squeeze[8] = {false};
+  int num_squeezed_dims = 0;
+  if (num_squeeze_dims == 0)
+  {
+    for (int idx = 0; idx < input_num_dims; ++idx)
+    {
+      if (input()->shape().dim(idx) == 1)
+      {
+        should_squeeze[idx] = true;
+        ++num_squeezed_dims;
+      }
+    }
+  }
+  else
+  {
+    for (int idx = 0; idx < num_squeeze_dims; ++idx)
+    {
+      int current = params().squeeze_dims[idx] < 0 ? params().squeeze_dims[idx] + input_num_dims
+                                                   : params().squeeze_dims[idx];
+      assert(current >= 0 && current < input_num_dims && input()->shape().dim(current) == 1);
+      if (!should_squeeze[current])
+        ++num_squeezed_dims;
+      should_squeeze[current] = true;
+    }
+  }
+  Shape output_shape(input_num_dims - num_squeezed_dims);
+  for (int in_idx = 0, out_idx = 0; in_idx < input_num_dims; ++in_idx)
+  {
+    if (!should_squeeze[in_idx])
+    {
+      output_shape.dim(out_idx++) = input()->shape().dim(in_idx);
+    }
+  }
+  output()->resize(output_shape);
+}
+
+void Squeeze::execute() const
+{
+  assert(input()->shape().num_elements() == output()->shape().num_elements());
+
+  const auto *input_data = input()->data<void>();
+  auto *output_data = output()->data<void>();
+  std::memcpy(output_data, input_data,
+              getDataTypeSize(input()->element_type()) * input()->shape().num_elements());
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Squeeze.h b/compiler/luci-interpreter/src/kernels/Squeeze.h
new file mode 100644
index 000000000..687af5158
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Squeeze.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SQUEEZE_H
+#define LUCI_INTERPRETER_KERNELS_SQUEEZE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Squeeze : public KernelWithParams<SqueezeParams>
+{
+public:
+  Squeeze(const Tensor *input, Tensor *output, const SqueezeParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SQUEEZE_H
diff --git a/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp b/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp
new file mode 100644
index 000000000..3a34284dd
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Squeeze.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<T> input_data, std::initializer_list<T> output_data,
+           DataType element_type, std::vector<int32_t> squeeze_dims)
+{
+  Tensor input_tensor{element_type, input_shape, {}, ""};
+  input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(T));
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  SqueezeParams params{};
+  for (size_t i = 0; i < squeeze_dims.size(); i++)
+  {
+    params.squeeze_dims.push_back(squeeze_dims.at(i));
+  }
+
+  Squeeze kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <typename T> class SqueezeTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_CASE(SqueezeTest, DataTypes);
+
+TYPED_TEST(SqueezeTest, TotalTest)
+{
+  Check<TypeParam>(
+      /*input_shape=*/{1, 24, 1}, /*output_shape=*/{24},
+      /*input_data=*/{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
+                      13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
+      /*output_data=*/{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
+                       13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
+      getElementType<TypeParam>(), {-1, 0});
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/StridedSlice.cpp b/compiler/luci-interpreter/src/kernels/StridedSlice.cpp
new file mode 100644
index 000000000..679485439
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/StridedSlice.cpp
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/StridedSlice.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+StridedSlice::StridedSlice(const Tensor *input, const Tensor *begin, const Tensor *end,
+                           const Tensor *strides, Tensor *output, const StridedSliceParams &params)
+    : KernelWithParams<StridedSliceParams>({input, begin, end, strides}, {output}, params)
+{
+}
+
+void StridedSlice::configure()
+{
+  assert(begin()->shape().num_dims() == 1);
+  assert(end()->shape().num_dims() == 1);
+  assert(strides()->shape().num_dims() == 1);
+  assert(input()->element_type() == output()->element_type());
+  assert(begin()->element_type() == DataType::S32);
+  assert(end()->element_type() == DataType::S32);
+  assert(strides()->element_type() == DataType::S32);
+  assert(input()->shape().num_dims() <= 4);
+  if (params().ellipsis_mask != 0)
+  {
+    throw std::runtime_error("ellipsis_mask is not implemented yet.");
+  }
+  if (params().new_axis_mask != 0)
+  {
+    throw std::runtime_error("new_axis_mask is not implemented yet.");
+  }
+  if (input()->element_type() == DataType::U8)
+  {
+    assert(input()->scale() == output()->scale());
+    assert(input()->zero_point() == output()->zero_point());
+  }
+  tflite::StridedSliceParams op_params{};
+  op_params.start_indices_count = input()->shape().num_dims();
+  op_params.stop_indices_count = input()->shape().num_dims();
+  op_params.strides_count = input()->shape().num_dims();
+
+  for (int i = 0; i < input()->shape().num_dims(); i++)
+  {
+    op_params.start_indices[i] = getTensorData<int32_t>(begin())[i];
+    op_params.stop_indices[i] = getTensorData<int32_t>(end())[i];
+    op_params.strides[i] = getTensorData<int32_t>(strides())[i];
+  }
+  op_params.begin_mask = params().begin_mask;
+  op_params.ellipsis_mask = 0;
+  op_params.end_mask = params().end_mask;
+  op_params.new_axis_mask = 0;
+  op_params.shrink_axis_mask = params().shrink_axis_mask;
+  std::vector<int32_t> output_shape_vector;
+  for (int i = 0; i < input()->shape().num_dims(); i++)
+  {
+    int idx = input()->shape().num_dims() - i - 1;
+    int32_t stride = getTensorData<int32_t>(strides())[idx];
+    assert(stride != 0);
+    int32_t begin = ::tflite::strided_slice::StartForAxis(op_params, getTensorShape(input()), idx);
+    int32_t end =
+        ::tflite::strided_slice::StopForAxis(op_params, getTensorShape(input()), idx, begin);
+
+    const bool shrink_axis = params().shrink_axis_mask & (1 << idx);
+    if (shrink_axis)
+    {
+      end = begin + 1;
+    }
+
+    int32_t dim_shape = std::ceil((end - begin) / static_cast<float>(stride));
+    dim_shape = dim_shape < 0 ? 0 : dim_shape;
+    if (!shrink_axis)
+    {
+      output_shape_vector.push_back(dim_shape);
+    }
+  }
+  Shape output_shape = Shape(output_shape_vector.size());
+  for (size_t i = 0; i < output_shape_vector.size(); i++)
+  {
+    output_shape.dim(i) = output_shape_vector[output_shape_vector.size() - i - 1];
+  }
+  output()->resize(output_shape);
+}
+
+void StridedSlice::execute() const
+{
+  tflite::StridedSliceParams op_params{};
+  op_params.start_indices_count = input()->shape().num_dims();
+  op_params.stop_indices_count = input()->shape().num_dims();
+  op_params.strides_count = input()->shape().num_dims();
+
+  for (int i = 0; i < input()->shape().num_dims(); i++)
+  {
+    op_params.start_indices[i] = getTensorData<int32_t>(begin())[i];
+    op_params.stop_indices[i] = getTensorData<int32_t>(end())[i];
+    op_params.strides[i] = getTensorData<int32_t>(strides())[i];
+  }
+  op_params.begin_mask = params().begin_mask;
+  op_params.ellipsis_mask = 0;
+  op_params.end_mask = params().end_mask;
+  op_params.new_axis_mask = 0;
+  op_params.shrink_axis_mask = params().shrink_axis_mask;
+
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()),
+                                          getTensorData<float>(input()), getTensorShape(output()),
+                                          getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      tflite::reference_ops::StridedSlice(op_params, getTensorShape(input()),
+                                          getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                          getTensorData<uint8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/StridedSlice.h b/compiler/luci-interpreter/src/kernels/StridedSlice.h
new file mode 100644
index 000000000..fc96893a7
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/StridedSlice.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_STRIDEDSLICE_H
+#define LUCI_INTERPRETER_KERNELS_STRIDEDSLICE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class StridedSlice : public KernelWithParams<StridedSliceParams>
+{
+public:
+  StridedSlice(const Tensor *input, const Tensor *begin, const Tensor *end, const Tensor *strides,
+               Tensor *output, const StridedSliceParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *begin() const { return _inputs[1]; }
+  const Tensor *end() const { return _inputs[2]; }
+  const Tensor *strides() const { return _inputs[3]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_STRIDEDSLICE_H
diff --git a/compiler/luci-interpreter/src/kernels/StridedSlice.test.cpp b/compiler/luci-interpreter/src/kernels/StridedSlice.test.cpp
new file mode 100644
index 000000000..5ab06e2ec
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/StridedSlice.test.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/StridedSlice.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(StridedSliceTest, Float)
+{
+  Shape input_shape{2, 3, 2};
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  Shape begin_shape{3};
+  std::vector<int32_t> begin_data{0, 0, 0};
+  Shape end_shape{3};
+  std::vector<int32_t> end_data{1, 3, 2};
+  Shape strides_shape{3};
+  std::vector<int32_t> strides_data{1, 1, 1};
+  Tensor input_tensor{DataType::FLOAT32, input_shape, {}, ""};
+  Tensor begin_tensor{DataType::S32, begin_shape, {}, ""};
+  Tensor end_tensor{DataType::S32, end_shape, {}, ""};
+  Tensor strides_tensor{DataType::S32, strides_shape, {}, ""};
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  input_tensor.writeData(input_data.data(), input_data.size() * sizeof(float));
+  begin_tensor.writeData(begin_data.data(), begin_data.size() * sizeof(int32_t));
+  end_tensor.writeData(end_data.data(), end_data.size() * sizeof(int32_t));
+  strides_tensor.writeData(strides_data.data(), strides_data.size() * sizeof(int32_t));
+
+  StridedSliceParams params{};
+  params.begin_mask = 0;
+  params.end_mask = 0;
+  params.ellipsis_mask = 0;
+  params.new_axis_mask = 0;
+  params.shrink_axis_mask = 1;
+
+  StridedSlice kernel(&input_tensor, &begin_tensor, &end_tensor, &strides_tensor, &output_tensor,
+                      params);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<int32_t> output_shape{3, 2};
+  std::vector<float> output_data{1, 2, 3, 4, 5, 6};
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ElementsAreArray(ArrayFloatNear(output_data)));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(StridedSliceTest, Uint8)
+{
+  Shape input_shape{2, 3, 2};
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  std::vector<uint8_t> quant_input_data = quantize<uint8_t>(input_data, 1.0f, 0);
+  Shape begin_shape{3};
+  std::vector<int32_t> begin_data{0, 0, 0};
+  Shape end_shape{3};
+  std::vector<int32_t> end_data{1, 3, 2};
+  Shape strides_shape{3};
+  std::vector<int32_t> strides_data{1, 1, 1};
+  Tensor input_tensor{DataType::U8, input_shape, {{1.0f}, {0}}, ""};
+  Tensor begin_tensor{DataType::S32, begin_shape, {}, ""};
+  Tensor end_tensor{DataType::S32, end_shape, {}, ""};
+  Tensor strides_tensor{DataType::S32, strides_shape, {}, ""};
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 1.0f, 0);
+
+  input_tensor.writeData(quant_input_data.data(), quant_input_data.size() * sizeof(uint8_t));
+  begin_tensor.writeData(begin_data.data(), begin_data.size() * sizeof(int32_t));
+  end_tensor.writeData(end_data.data(), end_data.size() * sizeof(int32_t));
+  strides_tensor.writeData(strides_data.data(), strides_data.size() * sizeof(int32_t));
+
+  StridedSliceParams params{};
+  params.begin_mask = 0;
+  params.end_mask = 0;
+  params.ellipsis_mask = 0;
+  params.new_axis_mask = 0;
+  params.shrink_axis_mask = 1;
+
+  StridedSlice kernel(&input_tensor, &begin_tensor, &end_tensor, &strides_tensor, &output_tensor,
+                      params);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<int32_t> output_shape{3, 2};
+  std::vector<float> output_data{1, 2, 3, 4, 5, 6};
+  EXPECT_THAT(dequantize(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(),
+                         output_tensor.zero_point()),
+              ElementsAreArray(ArrayFloatNear(output_data)));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/TestUtils.cpp b/compiler/luci-interpreter/src/kernels/TestUtils.cpp
new file mode 100644
index 000000000..2c8a6ae78
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/TestUtils.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace testing
+{
+
+using ::testing::FloatNear;
+using ::testing::Matcher;
+
+Tensor makeOutputTensor(DataType element_type) { return Tensor(element_type, {}, {}, ""); }
+
+Tensor makeOutputTensor(DataType element_type, float scale, int32_t zero_point)
+{
+  return Tensor(element_type, {}, {{scale}, {zero_point}}, "");
+}
+
+std::vector<Matcher<float>> ArrayFloatNear(const std::vector<float> &values, float max_abs_error)
+{
+  std::vector<Matcher<float>> matchers;
+  matchers.reserve(values.size());
+  for (const float v : values)
+  {
+    matchers.emplace_back(FloatNear(v, max_abs_error));
+  }
+  return matchers;
+}
+
+std::vector<int32_t> extractTensorShape(const Tensor &tensor)
+{
+  std::vector<int32_t> result;
+  int dims = tensor.shape().num_dims();
+  for (int i = 0; i < dims; i++)
+  {
+    result.push_back(tensor.shape().dim(i));
+  }
+  return result;
+}
+
+} // namespace testing
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/TestUtils.h b/compiler/luci-interpreter/src/kernels/TestUtils.h
new file mode 100644
index 000000000..5311a1949
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/TestUtils.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_TESTUTILS_H
+#define LUCI_INTERPRETER_KERNELS_TESTUTILS_H
+
+#include "luci_interpreter/core/Tensor.h"
+
+#include <type_traits>
+
+#include <gtest/gtest.h>
+#include <gmock/gmock.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace testing
+{
+
+template <DataType DT>
+Tensor makeInputTensor(const Shape &shape, const std::vector<typename DataTypeImpl<DT>::Type> &data)
+{
+  Tensor tensor(DT, shape, {}, "");
+  tensor.writeData(data.data(), data.size() * sizeof(typename DataTypeImpl<DT>::Type));
+  return tensor;
+}
+
+Tensor makeOutputTensor(DataType element_type);
+Tensor makeOutputTensor(DataType element_type, float scale, int32_t zero_point);
+
+std::vector<int32_t> extractTensorShape(const Tensor &tensor);
+
+// Returns the corresponding DataType given the type T.
+template <typename T> constexpr DataType getElementType()
+{
+  if (std::is_same<T, float>::value)
+    return DataType::FLOAT32;
+  if (std::is_same<T, uint8_t>::value)
+    return DataType::U8;
+  if (std::is_same<T, int32_t>::value)
+    return DataType::S32;
+  if (std::is_same<T, int64_t>::value)
+    return DataType::S64;
+  return DataType::Unknown;
+}
+
+template <typename T> std::vector<T> extractTensorData(const Tensor &tensor)
+{
+  const auto *data_ptr = tensor.data<T>();
+  return std::vector<T>(data_ptr, data_ptr + tensor.shape().num_elements());
+}
+
+std::vector<::testing::Matcher<float>> ArrayFloatNear(const std::vector<float> &values,
+                                                      float max_abs_error = 1.0e-5f);
+
+template <typename T>
+inline std::vector<T> quantize(const std::vector<float> &data, float scale, int32_t zero_point)
+{
+  assert(!std::is_floating_point<T>::value);
+  std::vector<T> q;
+  for (const auto &f : data)
+  {
+    q.push_back(static_cast<T>(std::max<float>(
+        std::numeric_limits<T>::lowest(),
+        std::min<float>(std::numeric_limits<T>::max(), std::round(zero_point + (f / scale))))));
+  }
+  return q;
+}
+
+template <typename T>
+inline std::vector<float> dequantize(const std::vector<T> &data, float scale, int32_t zero_point)
+{
+  assert(!std::is_floating_point<T>::value);
+  std::vector<float> f;
+  for (const T &q : data)
+  {
+    f.push_back(scale * (q - zero_point));
+  }
+  return f;
+}
+
+template <typename T> std::pair<float, int32_t> quantizationParams(float f_min, float f_max)
+{
+  if (std::is_floating_point<T>::value)
+  {
+    return {1.0f, 0};
+  }
+  int32_t zero_point = 0;
+  double scale = 0;
+  const T qmin = std::numeric_limits<T>::lowest();
+  const T qmax = std::numeric_limits<T>::max();
+  const double qmin_double = qmin;
+  const double qmax_double = qmax;
+  // 0 should always be a representable value. Let's assume that the initial
+  // min,max range contains 0.
+  assert(f_max >= 0);
+  assert(f_min <= 0);
+  if (f_min == f_max)
+  {
+    // Special case where the min,max range is a point. Should be {0}.
+    assert(f_max == 0);
+    assert(f_min == 0);
+    return {scale, zero_point};
+  }
+
+  // General case.
+  //
+  // First determine the scale.
+  scale = (f_max - f_min) / (qmax_double - qmin_double);
+
+  // Zero-point computation.
+  // First the initial floating-point computation. The zero-point can be
+  // determined from solving an affine equation for any known pair
+  // (real value, corresponding quantized value).
+  // We know two such pairs: (rmin, qmin) and (rmax, qmax).
+  // The arithmetic error on the zero point computed from either pair
+  // will be roughly machine_epsilon * (sum of absolute values of terms)
+  // so we want to use the variant that adds the smaller terms.
+  const double zero_point_from_min = qmin_double - f_min / scale;
+  const double zero_point_from_max = qmax_double - f_max / scale;
+
+  const double zero_point_from_min_error = std::abs(qmin_double) + std::abs(f_min / scale);
+
+  const double zero_point_from_max_error = std::abs(qmax_double) + std::abs(f_max / scale);
+
+  const double zero_point_double = zero_point_from_min_error < zero_point_from_max_error
+                                       ? zero_point_from_min
+                                       : zero_point_from_max;
+
+  // Now we need to nudge the zero point to be an integer
+  // (our zero points are integer, and this is motivated by the requirement
+  // to be able to represent the real value "0" exactly as a quantized value,
+  // which is required in multiple places, for example in Im2col with SAME
+  //  padding).
+
+  T nudged_zero_point = 0;
+  if (zero_point_double < qmin_double)
+  {
+    nudged_zero_point = qmin;
+  }
+  else if (zero_point_double > qmax_double)
+  {
+    nudged_zero_point = qmax;
+  }
+  else
+  {
+    nudged_zero_point = static_cast<T>(std::round(zero_point_double));
+  }
+
+  // The zero point should always be in the range of quantized value,
+  // // [qmin, qmax].
+  assert(qmax >= nudged_zero_point);
+  assert(qmin <= nudged_zero_point);
+  zero_point = nudged_zero_point;
+  // finally, return the values
+  return {static_cast<float>(scale), zero_point};
+}
+
+inline float getTolerance(float min, float max, int quantize_steps)
+{
+  return ((max - min) / quantize_steps);
+}
+
+} // namespace testing
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_TESTUTILS_H
diff --git a/compiler/luci-interpreter/src/kernels/Transpose.cpp b/compiler/luci-interpreter/src/kernels/Transpose.cpp
new file mode 100644
index 000000000..8265d9937
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Transpose.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Transpose.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Transpose::Transpose(const Tensor *input, const Tensor *perm, Tensor *output)
+    : Kernel({input, perm}, {output})
+{
+}
+
+void Transpose::configure()
+{
+  // Transpose op only supports 1D-4D input arrays.
+  int dims = input()->shape().num_dims();
+  const int *perm_data = getTensorData<int32_t>(perm());
+
+  assert(input()->shape().num_dims() <= 4);
+  assert(input()->element_type() == output()->element_type());
+
+  assert(perm()->shape().num_dims() == 1);
+  assert(perm()->shape().dim(0) == dims);
+
+  Shape output_shape(dims);
+  for (int i = 0; i < dims; i++)
+  {
+    assert(perm_data[i] < dims && perm_data[i] >= 0);
+    output_shape.dim(i) = input()->shape().dim(perm_data[i]);
+  }
+
+  output()->resize(output_shape);
+}
+
+void Transpose::execute() const
+{
+  tflite::TransposeParams params{};
+  const int *perm_data = getTensorData<int32_t>(perm());
+  const int size = perm()->shape().dim(0);
+  params.perm_count = size;
+  for (int i = 0; i < size; i++)
+    params.perm[i] = perm_data[i];
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::reference_ops::Transpose(params, getTensorShape(input()),
+                                       getTensorData<float>(input()), getTensorShape(output()),
+                                       getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      tflite::reference_ops::Transpose(params, getTensorShape(input()),
+                                       getTensorData<uint8_t>(input()), getTensorShape(output()),
+                                       getTensorData<uint8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Transpose.h b/compiler/luci-interpreter/src/kernels/Transpose.h
new file mode 100644
index 000000000..d6f89c352
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Transpose.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_TRANSPOSE_H
+#define LUCI_INTERPRETER_KERNELS_TRANSPOSE_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Transpose : public Kernel
+{
+public:
+  Transpose(const Tensor *input, const Tensor *perm, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *perm() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_TRANSPOSE_H
diff --git a/compiler/luci-interpreter/src/kernels/Transpose.test.cpp b/compiler/luci-interpreter/src/kernels/Transpose.test.cpp
new file mode 100644
index 000000000..87e6e2a00
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Transpose.test.cpp
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Transpose.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> perm_shape,
+           std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data,
+           std::initializer_list<int32_t> perm_data, std::initializer_list<T> output_data,
+           DataType element_type)
+{
+  Tensor input_tensor{element_type, input_shape, {}, ""};
+  input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(T));
+
+  Tensor perm_tensor{DataType::S32, perm_shape, {}, ""};
+  perm_tensor.writeData(perm_data.begin(), perm_data.size() * sizeof(int32_t));
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  Transpose kernel(&input_tensor, &perm_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+}
+
+template <typename T> class TransposeTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_CASE(TransposeTest, DataTypes);
+
+TYPED_TEST(TransposeTest, Small3D)
+{
+  Check<TypeParam>(/*input_shape=*/{2, 3, 4}, /*perm_shape=*/{3}, /*output_shape=*/{4, 2, 3},
+                   /*input_data=*/{0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11,
+                                   12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23},
+                   /*perm_data=*/{2, 0, 1},
+                   /*output_data=*/{0, 4, 8,  12, 16, 20, 1, 5, 9,  13, 17, 21,
+                                    2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23},
+                   getElementType<TypeParam>());
+}
+
+TYPED_TEST(TransposeTest, Large4D)
+{
+  Check<TypeParam>(
+      /*input_shape=*/{2, 3, 4, 5}, /*perm_shape=*/{4}, /*output_shape=*/{4, 2, 3, 5},
+      /*input_data=*/{0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,
+                      15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
+                      30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
+                      45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
+                      60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
+                      75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
+                      90,  91,  92,  93,  94,  95,  96,  97,  98,  99,  100, 101, 102, 103, 104,
+                      105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119},
+      /*perm_data=*/{2, 0, 1, 3},
+      /*output_data=*/{0,  1,  2,  3,  4,  20, 21, 22, 23, 24, 40,  41,  42,  43,  44,
+                       60, 61, 62, 63, 64, 80, 81, 82, 83, 84, 100, 101, 102, 103, 104,
+                       5,  6,  7,  8,  9,  25, 26, 27, 28, 29, 45,  46,  47,  48,  49,
+                       65, 66, 67, 68, 69, 85, 86, 87, 88, 89, 105, 106, 107, 108, 109,
+                       10, 11, 12, 13, 14, 30, 31, 32, 33, 34, 50,  51,  52,  53,  54,
+                       70, 71, 72, 73, 74, 90, 91, 92, 93, 94, 110, 111, 112, 113, 114,
+                       15, 16, 17, 18, 19, 35, 36, 37, 38, 39, 55,  56,  57,  58,  59,
+                       75, 76, 77, 78, 79, 95, 96, 97, 98, 99, 115, 116, 117, 118, 119},
+      getElementType<TypeParam>());
+}
+
+TYPED_TEST(TransposeTest, Large2D)
+{
+  Check<TypeParam>(
+      /*input_shape=*/{10, 12}, /*perm_shape=*/{2}, /*output_shape=*/{12, 10},
+      /*input_data=*/{0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,
+                      15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
+                      30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
+                      45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
+                      60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
+                      75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
+                      90,  91,  92,  93,  94,  95,  96,  97,  98,  99,  100, 101, 102, 103, 104,
+                      105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119},
+      /*perm_data=*/{1, 0},
+      /*output_data=*/{0,  12, 24, 36,  48,  60, 72, 84, 96,  108, 1,  13, 25, 37,  49,
+                       61, 73, 85, 97,  109, 2,  14, 26, 38,  50,  62, 74, 86, 98,  110,
+                       3,  15, 27, 39,  51,  63, 75, 87, 99,  111, 4,  16, 28, 40,  52,
+                       64, 76, 88, 100, 112, 5,  17, 29, 41,  53,  65, 77, 89, 101, 113,
+                       6,  18, 30, 42,  54,  66, 78, 90, 102, 114, 7,  19, 31, 43,  55,
+                       67, 79, 91, 103, 115, 8,  20, 32, 44,  56,  68, 80, 92, 104, 116,
+                       9,  21, 33, 45,  57,  69, 81, 93, 105, 117, 10, 22, 34, 46,  58,
+                       70, 82, 94, 106, 118, 11, 23, 35, 47,  59,  71, 83, 95, 107, 119},
+      getElementType<TypeParam>());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.cpp b/compiler/luci-interpreter/src/kernels/TransposeConv.cpp
new file mode 100644
index 000000000..46380e2fa
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/TransposeConv.cpp
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TransposeConv.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+TransposeConv::TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
+                             Tensor *output, const TransposeConvParams &params)
+    : KernelWithParams<TransposeConvParams>({output_shape, filter, input}, {output}, params)
+{
+}
+
+void TransposeConv::configure()
+{
+  assert(output_shape()->shape().num_dims() == 1);
+  assert(input()->shape().num_dims() == 4);
+  assert(filter()->shape().num_dims() == 4);
+  assert(input()->element_type() == DataType::FLOAT32 || input()->element_type() == DataType::U8);
+  assert(input()->element_type() == output()->element_type());
+  assert(input()->shape().dim(3) == filter()->shape().dim(3));
+  if (input()->element_type() == DataType::U8)
+  {
+    _scratch_tensor =
+        std::make_unique<Tensor>(DataType::S32, output()->shape(), AffineQuantization{}, "");
+    double real_multiplier = 0.0;
+    const double input_product_scale = input()->scale() * filter()->scale();
+    assert(input_product_scale >= 0);
+    real_multiplier = input_product_scale / output()->scale();
+    int exponent;
+    quantizeMultiplier(real_multiplier, &_output_multiplier, &exponent);
+    _output_shift = -exponent;
+  }
+
+  const int num_dims = output_shape()->shape().dim(0);
+  Shape out_shape(num_dims);
+  const auto *shape_data = getTensorData<int32_t>(output_shape());
+  for (int i = 0; i < num_dims; i++)
+    out_shape.dim(i) = shape_data[i];
+  output()->resize(out_shape);
+}
+
+void TransposeConv::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void TransposeConv::evalFloat() const
+{
+  const int width = output()->shape().dim(2);
+  const int height = output()->shape().dim(1);
+
+  const int filter_width = filter()->shape().dim(2);
+  const int filter_height = filter()->shape().dim(1);
+
+  int unused_output_height, unused_output_width;
+  unused_output_width =
+      computeOutputSize(params().padding, width, filter_width, params().stride_width, 1);
+  unused_output_height =
+      computeOutputSize(params().padding, height, filter_height, params().stride_height, 1);
+  int32_t offset = 0;
+  tflite::ConvParams op_params{};
+  op_params.padding_type = tflite::PaddingType::kSame;
+  op_params.padding_values.height = computePaddingWithOffset(
+      params().stride_height, 1, height, filter_height, unused_output_height, &offset);
+  op_params.padding_values.height_offset = offset;
+  op_params.padding_values.width = computePaddingWithOffset(
+      params().stride_width, 1, width, filter_width, unused_output_width, &offset);
+  op_params.padding_values.width_offset = offset;
+  op_params.stride_height = params().stride_height;
+  op_params.stride_width = params().stride_width;
+  op_params.output_multiplier = _output_multiplier;
+  tflite::reference_ops::TransposeConv(
+      op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
+      getTensorData<float>(filter()), getTensorShape(output()), getTensorData<float>(output()),
+      tflite::RuntimeShape(), (float *)nullptr);
+}
+
+void TransposeConv::evalQuantized() const
+{
+  int32_t input_offset = -input()->zero_point();
+  int32_t filter_offset = -filter()->zero_point();
+  int32_t output_offset = filter()->zero_point();
+  const int width = output()->shape().dim(2);
+  const int height = output()->shape().dim(1);
+
+  const int filter_width = filter()->shape().dim(2);
+  const int filter_height = filter()->shape().dim(1);
+
+  int unused_output_height, unused_output_width;
+  unused_output_width =
+      computeOutputSize(params().padding, width, filter_width, params().stride_width, 1);
+  unused_output_height =
+      computeOutputSize(params().padding, height, filter_height, params().stride_height, 1);
+  int32_t offset = 0;
+  tflite::ConvParams op_params{};
+  op_params.padding_type = tflite::PaddingType::kSame;
+  op_params.padding_values.height = computePaddingWithOffset(
+      params().stride_height, 1, height, filter_height, unused_output_height, &offset);
+  op_params.padding_values.width = computePaddingWithOffset(
+      params().stride_width, 1, width, filter_width, unused_output_width, &offset);
+  op_params.stride_height = params().stride_height;
+  op_params.stride_width = params().stride_width;
+  op_params.input_offset = input_offset;
+  op_params.output_offset = output_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_multiplier = _output_multiplier;
+  op_params.output_shift = -_output_shift;
+  op_params.quantized_activation_min = std::numeric_limits<uint8_t>::min();
+  op_params.quantized_activation_max = std::numeric_limits<uint8_t>::max();
+
+  tflite::reference_ops::TransposeConv(
+      op_params, getTensorShape(input()), getTensorData<uint8>(input()), getTensorShape(filter()),
+      getTensorData<uint8>(filter()), getTensorShape(output()), getTensorData<uint8>(output()),
+      tflite::RuntimeShape(), (uint8 *)nullptr, getTensorData<int32_t>(_scratch_tensor.get()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.h b/compiler/luci-interpreter/src/kernels/TransposeConv.h
new file mode 100644
index 000000000..d73e939b7
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/TransposeConv.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_TRANSPOSECONV_H
+#define LUCI_INTERPRETER_KERNELS_TRANSPOSECONV_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class TransposeConv : public KernelWithParams<TransposeConvParams>
+{
+public:
+  TransposeConv(const Tensor *output_shape, const Tensor *filter, const Tensor *input,
+                Tensor *output, const TransposeConvParams &params);
+
+  const Tensor *output_shape() const { return _inputs[0]; }
+  const Tensor *filter() const { return _inputs[1]; }
+  const Tensor *input() const { return _inputs[2]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+
+private:
+  std::unique_ptr<Tensor> _scratch_tensor;
+
+  // The scaling factor from input to output (aka the 'real multiplier') can
+  // be represented as a fixed point multiplier plus a left shift.
+  int32_t _output_multiplier = 0;
+  int _output_shift = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_TRANSPOSECONV_H
diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp b/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp
new file mode 100644
index 000000000..3386d3683
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/TransposeConv.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> output_shape_shape,
+           std::initializer_list<int32_t> weight_shape,
+           std::initializer_list<int32_t> input_data_shape,
+           std::initializer_list<int32_t> output_shape,
+           std::initializer_list<int32_t> output_shape_data, std::initializer_list<T> weight_data,
+           std::initializer_list<T> input_data_data, std::initializer_list<T> output_data,
+           luci::Padding padding, int32_t stride_height, int32_t stride_width,
+           DataType element_type)
+{
+  Tensor output_shape_tensor{element_type, output_shape_shape, {}, ""};
+  output_shape_tensor.writeData(output_shape_data.begin(), output_shape_data.size() * sizeof(T));
+  Tensor weight_tensor{element_type, weight_shape, {}, ""};
+  weight_tensor.writeData(weight_data.begin(), weight_data.size() * sizeof(T));
+  Tensor input_data_tensor{element_type, input_data_shape, {}, ""};
+  input_data_tensor.writeData(input_data_data.begin(), input_data_data.size() * sizeof(T));
+
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  TransposeConvParams params{};
+  params.padding = padding;
+  params.stride_height = stride_height;
+  params.stride_width = stride_width;
+
+  TransposeConv kernel(&output_shape_tensor, &weight_tensor, &input_data_tensor, &output_tensor,
+                       params);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+}
+
+TEST(TransposeConvTest, FloatSimple)
+{
+  Check<float>(
+      /*outputShape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 1}, /*input_shape=*/{1, 4, 4, 1},
+      /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 4, 4, 1},
+      /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9},
+      /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+      /*output_data=*/{29, 62, 83, 75, 99, 192, 237, 198, 207, 372, 417, 330, 263, 446, 485, 365},
+      /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1,
+      getElementType<float>());
+}
+
+TEST(TransposeConvTest, FloatTwoFiltersTest)
+{
+  Check<float>(
+      /*outputShape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 2}, /*input_shape=*/{1, 4, 4, 2},
+      /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 4, 4, 1},
+      /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18},
+      /*input_data=*/{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
+                      17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32},
+      /*output_data=*/{184, 412, 568, 528, 678, 1347, 1689, 1434, 1494, 2715, 3057, 2442, 1968,
+                       3352, 3652, 2760},
+      /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1,
+      getElementType<float>());
+}
+
+TEST(TransposeConvTest, Uint8Simple)
+{
+  // TODO
+  // Implement GetDequantizedOutput Function.
+  // Create Test for Uint8 Case
+}
+TEST(TransposeConvTest, Uint8FiltersTest)
+{
+  // TODO
+  // Implement GetDequantizedOutput Function.
+  // Create Test for Uint8 Case
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Unpack.cpp b/compiler/luci-interpreter/src/kernels/Unpack.cpp
new file mode 100644
index 000000000..834b79926
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Unpack.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Unpack.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Unpack::Unpack(const Tensor *input, std::vector<Tensor *> outputs, const UnpackParams &params)
+    : KernelWithParams<UnpackParams>({input}, std::move(outputs), params)
+{
+}
+
+void Unpack::configure()
+{
+  const Shape &input_shape = input()->shape();
+
+  int axis = _params.axis;
+  if (axis < 0)
+    axis += input()->shape().num_dims();
+  assert(axis >= 0 && axis < input_shape.num_dims());
+
+  Shape output_shape(input_shape.num_dims() - 1);
+  int out_index = 0;
+  for (int in_index = 0; in_index < input_shape.num_dims(); ++in_index)
+  {
+    if (in_index != axis)
+      output_shape.dim(out_index++) = input_shape.dim(in_index);
+  }
+
+  for (Tensor *output : _outputs)
+  {
+    assert(output->element_type() == input()->element_type());
+    output->resize(output_shape);
+  }
+}
+
+template <typename T> void Unpack::executeImpl() const
+{
+  tflite::UnpackParams params{};
+  params.axis = _params.axis;
+  params.num_split = _outputs.size();
+  VectorOfTensors<T, false> all_outputs(_outputs);
+  tflite::reference_ops::Unpack<T>(params, getTensorShape(input()), getTensorData<T>(input()),
+                                   **all_outputs.shapes(), all_outputs.data());
+}
+
+void Unpack::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      return executeImpl<float>();
+    case DataType::U8:
+      return executeImpl<uint8_t>();
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Unpack.h b/compiler/luci-interpreter/src/kernels/Unpack.h
new file mode 100644
index 000000000..f4a44ecad
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Unpack.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_UNPACK_H
+#define LUCI_INTERPRETER_KERNELS_UNPACK_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Unpack : public KernelWithParams<UnpackParams>
+{
+public:
+  Unpack(const Tensor *input, std::vector<Tensor *> outputs, const UnpackParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output(int index) const { return _outputs[index]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> void executeImpl() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_UNPACK_H
diff --git a/compiler/luci-interpreter/src/kernels/Unpack.test.cpp b/compiler/luci-interpreter/src/kernels/Unpack.test.cpp
new file mode 100644
index 000000000..f70c5847a
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Unpack.test.cpp
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Unpack.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(int axis, Shape input_shape, std::initializer_list<T> input_data,
+           const std::vector<std::initializer_list<int32_t>> &exp_output_shape,
+           std::vector<std::initializer_list<T>> exp_output_data)
+{
+  constexpr DataType element_type = getElementType<T>();
+  const int num_outputs = input_shape.dim(axis < 0 ? axis + input_shape.num_dims() : axis);
+
+  Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
+  std::vector<Tensor> output_tensors;
+  output_tensors.reserve(num_outputs);
+  for (int i = 0; i < num_outputs; ++i)
+  {
+    output_tensors.push_back(makeOutputTensor(element_type));
+  }
+
+  std::vector<Tensor *> output_tensor_ptrs(num_outputs);
+  for (int i = 0; i < num_outputs; ++i)
+  {
+    output_tensor_ptrs[i] = &output_tensors[i];
+  }
+
+  UnpackParams params{};
+  params.axis = axis;
+
+  Unpack kernel(&input_tensor, std::move(output_tensor_ptrs), params);
+  kernel.configure();
+  kernel.execute();
+
+  for (int i = 0; i < num_outputs; ++i)
+  {
+    EXPECT_THAT(extractTensorData<T>(output_tensors[i]),
+                ::testing::ElementsAreArray(exp_output_data[i]));
+  }
+}
+
+template <typename T> class UnpackTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_CASE(UnpackTest, DataTypes);
+
+TYPED_TEST(UnpackTest, ThreeOutputs)
+{
+  Check<TypeParam>(/*axis=*/0, /*input_shape=*/{3, 2},
+                   /*input_data=*/{1, 2, 3, 4, 5, 6},
+                   /*exp_output_shape=*/{{2}, {2}, {2}},
+                   /*exp_output_data=*/{{1, 2}, {3, 4}, {5, 6}});
+}
+
+TYPED_TEST(UnpackTest, ThreeOutputsAxisOne)
+{
+  Check<TypeParam>(/*axis=*/1, /*input_shape=*/{3, 2},
+                   /*input_data=*/{1, 2, 3, 4, 5, 6},
+                   /*exp_output_shape=*/{{3}, {3}},
+                   /*exp_output_data=*/{{1, 3, 5}, {2, 4, 6}});
+}
+
+TYPED_TEST(UnpackTest, ThreeOutputsNegativeAxisOne)
+{
+  Check<TypeParam>(/*axis=*/-1, /*input_shape=*/{3, 2},
+                   /*input_data=*/{1, 2, 3, 4, 5, 6},
+                   /*exp_output_shape=*/{{3}, {3}},
+                   /*exp_output_data=*/{{1, 3, 5}, {2, 4, 6}});
+}
+
+TYPED_TEST(UnpackTest, ThreeOutputsNegativeAxisTwo)
+{
+  Check<TypeParam>(/*axis=*/-2, /*input_shape=*/{3, 2},
+                   /*input_data=*/{1, 2, 3, 4, 5, 6},
+                   /*exp_output_shape=*/{{2}, {2}, {2}},
+                   /*exp_output_data=*/{{1, 2}, {3, 4}, {5, 6}});
+}
+
+TYPED_TEST(UnpackTest, OneOutput)
+{
+  Check<TypeParam>(/*axis=*/0, /*input_shape=*/{1, 6},
+                   /*input_data=*/{1, 2, 3, 4, 5, 6},
+                   /*exp_output_shape=*/{{6}},
+                   /*exp_output_data=*/{{1, 2, 3, 4, 5, 6}});
+}
+
+TYPED_TEST(UnpackTest, ThreeDimensionsTwoOutputs)
+{
+  Check<TypeParam>(/*axis=*/2, /*input_shape=*/{2, 2, 2},
+                   /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8},
+                   /*exp_output_shape=*/{{2, 2}, {2, 2}},
+                   /*exp_output_data=*/{{1, 3, 5, 7}, {2, 4, 6, 8}});
+}
+
+TYPED_TEST(UnpackTest, FiveDimensionsTwoOutputs)
+{
+  Check<TypeParam>(
+      /*axis=*/2, /*input_shape=*/{2, 2, 2, 2, 1},
+      /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+      /*exp_output_shape=*/{{2, 2, 2, 1}, {2, 2, 2, 1}},
+      /*exp_output_data=*/
+      {{1, 2, 5, 6, 9, 10, 13, 14}, {3, 4, 7, 8, 11, 12, 15, 16}});
+}
+
+TYPED_TEST(UnpackTest, VectorToScalar)
+{
+  Check<TypeParam>(/*axis=*/0, /*input_shape=*/{5},
+                   /*input_data=*/{1, 2, 3, 4, 5},
+                   /*exp_output_shape=*/{{}, {}, {}, {}, {}},
+                   /*exp_output_data=*/{{1}, {2}, {3}, {4}, {5}});
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Utils.cpp b/compiler/luci-interpreter/src/kernels/Utils.cpp
new file mode 100644
index 000000000..b9e7738a9
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Utils.cpp
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Utils.h"
+
+#include <cassert>
+#include <cmath>
+#include <limits>
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+void calculateActivationRange(Activation activation, float *activation_min, float *activation_max)
+{
+  switch (activation)
+  {
+    case Activation::NONE:
+      *activation_min = std::numeric_limits<float>::lowest();
+      *activation_max = std::numeric_limits<float>::max();
+      break;
+    case Activation::RELU:
+      *activation_min = 0;
+      *activation_max = std::numeric_limits<float>::max();
+      break;
+    case Activation::RELU_N1_TO_1:
+      *activation_min = -1;
+      *activation_max = 1;
+      break;
+    case Activation::RELU6:
+      *activation_min = 0;
+      *activation_max = 6;
+      break;
+    default:
+      throw std::runtime_error("Unsupported activation.");
+  }
+}
+
+static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t qmin, int32_t qmax,
+                                                  const Tensor *output, int32_t *activation_min,
+                                                  int32_t *activation_max)
+{
+  const float scale = output->scale();
+  const int32_t zero_point = output->zero_point();
+
+  auto quantize = [scale, zero_point](float x) {
+    return zero_point + static_cast<int32_t>(std::round(x / scale));
+  };
+
+  switch (activation)
+  {
+    case Activation::NONE:
+      *activation_min = qmin;
+      *activation_max = qmax;
+      break;
+    case Activation::RELU:
+      *activation_min = std::max(qmin, quantize(0.0f));
+      *activation_max = qmax;
+      break;
+    case Activation::RELU_N1_TO_1:
+      *activation_min = std::max(qmin, quantize(-1.0f));
+      *activation_max = std::min(qmax, quantize(1.0f));
+      break;
+    case Activation::RELU6:
+      *activation_min = std::max(qmin, quantize(0.0f));
+      *activation_max = std::min(qmax, quantize(6.0f));
+      break;
+    default:
+      throw std::runtime_error("Unsupported activation.");
+  }
+}
+
+void calculateActivationRangeQuantized(Activation activation, const Tensor *output,
+                                       int32_t *activation_min, int32_t *activation_max)
+{
+  int32_t qmin{};
+  int32_t qmax{};
+  switch (output->element_type())
+  {
+    case DataType::U8:
+      qmin = std::numeric_limits<uint8_t>::min();
+      qmax = std::numeric_limits<uint8_t>::max();
+      break;
+    case DataType::S8:
+      qmin = std::numeric_limits<int8_t>::min();
+      qmax = std::numeric_limits<int8_t>::max();
+      break;
+    case DataType::S16:
+      qmin = std::numeric_limits<int16_t>::min();
+      qmax = std::numeric_limits<int16_t>::max();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+
+  calculateActivationRangeQuantizedImpl(activation, qmin, qmax, output, activation_min,
+                                        activation_max);
+}
+
+void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift)
+{
+  if (double_multiplier == 0.0)
+  {
+    *quantized_multiplier = 0;
+    *shift = 0;
+    return;
+  }
+
+  const double q = std::frexp(double_multiplier, shift);
+  auto q_fixed = static_cast<int64_t>(std::round(q * (INT64_C(1) << 31)));
+
+  if (q_fixed == (INT64_C(1) << 31))
+  {
+    q_fixed /= 2;
+    ++*shift;
+  }
+  assert(q_fixed <= std::numeric_limits<int32_t>::max());
+  // A shift amount smaller than -31 would cause all bits to be shifted out
+  // and thus all results would be zero. We implement that instead with
+  // q_fixed==0, so as to avoid hitting issues with right-shift
+  // operations with shift amounts greater than 31. Note that this happens
+  // roughly when abs(double_multiplier) < 2^-31 and the present handling means
+  // that we're effectively flushing tiny double_multiplier's to zero.
+  // We could conceivably handle values in the range (roughly) [32, 63]
+  // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
+  // the present handling is just doing 'flush denormals to zero'. We could
+  // reconsider and actually generate nonzero denormals if a need arises.
+  if (*shift < -31)
+  {
+    *shift = 0;
+    q_fixed = 0;
+  }
+  *quantized_multiplier = static_cast<int32_t>(q_fixed);
+}
+
+void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier,
+                                         int *left_shift)
+{
+  assert(double_multiplier < 1.0);
+  assert(double_multiplier > 0.0);
+  int shift;
+  quantizeMultiplier(double_multiplier, quantized_multiplier, &shift);
+  assert(shift <= 0);
+  *left_shift = shift;
+}
+
+Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_shape)
+{
+  const int num_input1_dims = input1_shape.num_dims();
+  const int num_input2_dims = input2_shape.num_dims();
+  const int num_out_dims = std::max(num_input1_dims, num_input2_dims);
+  Shape output_shape(num_out_dims);
+
+  for (int i = 0; i < num_out_dims; ++i)
+  {
+    const int32_t input1_dim = i < num_input1_dims ? input1_shape.dim(num_input1_dims - i - 1) : 1;
+    const int32_t input2_dim = i < num_input2_dims ? input2_shape.dim(num_input2_dims - i - 1) : 1;
+    assert(input1_dim == input2_dim || input1_dim == 1 || input2_dim == 1);
+    output_shape.dim(num_out_dims - i - 1) = std::max(input1_dim, input2_dim);
+  }
+
+  return output_shape;
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Utils.h b/compiler/luci-interpreter/src/kernels/Utils.h
new file mode 100644
index 000000000..3c2cc8450
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Utils.h
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_UTILS_H
+#define LUCI_INTERPRETER_KERNELS_UTILS_H
+
+#include "core/KernelParams.h"
+#include "luci_interpreter/core/Tensor.h"
+
+#include <tensorflow/lite/kernels/internal/types.h>
+
+#include <cassert>
+#include <cstdint>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+inline int32_t computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size,
+                              int32_t filter_size, int32_t out_size)
+{
+  const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+  const int32_t padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2;
+  return padding > 0 ? padding : 0;
+}
+
+inline int32_t computePaddingWithOffset(int32_t stride, int32_t dilation_rate, int32_t in_size,
+                                        int32_t filter_size, int32_t out_size, int32_t *offset)
+{
+  int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+  int32_t total_padding = ((out_size - 1) * stride + effective_filter_size - in_size);
+  total_padding = total_padding > 0 ? total_padding : 0;
+  *offset = total_padding % 2;
+  return total_padding / 2;
+}
+
+inline int32_t computeOutputSize(Padding padding, int32_t image_size, int32_t filter_size,
+                                 int32_t stride, int32_t dilation_rate = 1)
+{
+  const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+  switch (padding)
+  {
+    case Padding::SAME:
+      return (image_size + stride - 1) / stride;
+    case Padding::VALID:
+      return (image_size + stride - effective_filter_size) / stride;
+    default:
+      assert(false);
+      return 0;
+  }
+}
+
+void calculateActivationRange(Activation activation, float *activation_min, float *activation_max);
+
+void calculateActivationRangeQuantized(Activation activation, const Tensor *output,
+                                       int32_t *activation_min, int32_t *activation_max);
+
+// Decompose a double multiplier into a Q0.31 int32 representation of its
+// significand, and shift representation of its exponent.
+//
+// Handles an arbitrary positive multiplier. The 'shift' output-value is
+// basically the 'floating-point exponent' of the multiplier:
+// Negative for a right-shift (when the multiplier is <1), positive for a
+// left-shift (when the multiplier is >1)
+void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift);
+
+// Decompose a double multiplier into a Q0.31 int32 representation of its
+// significand, and shift representation of NEGATIVE its exponent ---
+// this is intended as a RIGHT-shift.
+//
+// Restricted to the case where the multiplier < 1 (and non-negative).
+void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier,
+                                         int *left_shift);
+
+Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_shape);
+
+inline tflite::RuntimeShape getTensorShape(const Tensor *tensor)
+{
+  if (tensor == nullptr)
+    return tflite::RuntimeShape();
+
+  const Shape &shape = tensor->shape();
+  tflite::RuntimeShape runtime_shape(shape.num_dims());
+  for (int i = 0; i < shape.num_dims(); ++i)
+  {
+    runtime_shape.SetDim(i, shape.dim(i));
+  }
+  return runtime_shape;
+}
+
+template <typename T> const T *getTensorData(const Tensor *tensor)
+{
+  return tensor != nullptr ? tensor->data<T>() : nullptr;
+}
+
+template <typename T> T *getTensorData(Tensor *tensor)
+{
+  return tensor != nullptr ? tensor->data<T>() : nullptr;
+}
+
+// A list of tensors in a format that can be used by kernels like split and
+// concatenation.
+template <typename T, bool is_const> class VectorOfTensors
+{
+public:
+  using ElementT = typename std::conditional<is_const, const T, T>::type;
+  using TensorT = typename std::conditional<is_const, const Tensor, Tensor>::type;
+
+  // Build with the tensors in 'tensor_list'.
+  explicit VectorOfTensors(const std::vector<TensorT *> &tensor_list)
+  {
+    const int num_tensors = tensor_list.size();
+
+    all_data_.reserve(num_tensors);
+    all_shape_.reserve(num_tensors);
+    all_shape_ptr_.reserve(num_tensors);
+
+    for (TensorT *tensor : tensor_list)
+    {
+      all_data_.push_back(getTensorData<T>(tensor));
+      all_shape_.push_back(getTensorShape(tensor));
+    }
+
+    // Taking the pointer from inside a std::vector is only OK if the vector is
+    // never modified, so we populate all_shape in the previous loop and then we
+    // are free to grab iterators here.
+    for (tflite::RuntimeShape &shape : all_shape_)
+    {
+      all_shape_ptr_.push_back(&shape);
+    }
+  }
+  // Return a pointer to the data pointers of all tensors in the list. For
+  // example:
+  //   float* const* f = v.data();
+  //   f[0][1] is the second element of the first tensor.
+  ElementT *const *data() const { return all_data_.data(); }
+
+  // Return a pointer the shape pointers of all tensors in the list. For
+  // example:
+  //   const RuntimeShape* const* d = v.dims();
+  //   dims[1] are the dimensions of the second tensor in the list.
+  const tflite::RuntimeShape *const *shapes() const { return all_shape_ptr_.data(); }
+
+private:
+  std::vector<ElementT *> all_data_;
+  std::vector<tflite::RuntimeShape> all_shape_;
+  std::vector<tflite::RuntimeShape *> all_shape_ptr_;
+};
+
+// A list of quantized tensors in a format that can be used by kernels like
+// split and concatenation.
+template <bool is_const> class VectorOfQuantizedTensors : public VectorOfTensors<uint8_t, is_const>
+{
+public:
+  using typename VectorOfTensors<uint8_t, is_const>::TensorT;
+
+  // Build with the tensors in 'tensor_list'.
+  explicit VectorOfQuantizedTensors(const std::vector<TensorT *> &tensor_list)
+      : VectorOfTensors<uint8_t, is_const>(tensor_list)
+  {
+    for (TensorT *tensor : tensor_list)
+    {
+      zero_point_.push_back(tensor->zero_point());
+      scale_.push_back(tensor->scale());
+    }
+  }
+
+  const float *scale() const { return scale_.data(); }
+  const int32_t *zero_point() const { return zero_point_.data(); }
+
+private:
+  std::vector<int32_t> zero_point_;
+  std::vector<float> scale_;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_UTILS_H
diff --git a/compiler/luci-interpreter/src/loader/CMakeLists.txt b/compiler/luci-interpreter/src/loader/CMakeLists.txt
new file mode 100644
index 000000000..fb36c4ab0
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/CMakeLists.txt
@@ -0,0 +1,15 @@
+set(SOURCES
+    GraphLoader.h
+    GraphLoader.cpp
+    KernelBuilder.h
+    KernelBuilder.cpp
+    ModuleLoader.h
+    ModuleLoader.cpp
+    RuntimeToIR.h)
+
+add_library(luci_interpreter_loader STATIC ${SOURCES})
+set_target_properties(luci_interpreter_loader PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_include_directories(luci_interpreter_loader PUBLIC "${LUCI_INTERPRETER_SOURCE_DIR}")
+target_link_libraries(luci_interpreter_loader
+    PUBLIC luci_lang luci_interpreter_core
+    PRIVATE luci_interpreter_kernels nncc_common)
diff --git a/compiler/luci-interpreter/src/loader/GraphLoader.cpp b/compiler/luci-interpreter/src/loader/GraphLoader.cpp
new file mode 100644
index 000000000..779fa0647
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/GraphLoader.cpp
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loader/GraphLoader.h"
+
+#include "loader/ModuleLoader.h"
+#include "loader/KernelBuilder.h"
+
+#include <loco/IR/Algorithm.h>
+
+namespace luci_interpreter
+{
+namespace
+{
+
+template <typename NodeT> Shape getNodeShape(const NodeT *node)
+{
+  Shape shape(node->rank());
+  for (uint32_t i = 0; i < node->rank(); ++i)
+  {
+    shape.dim(i) = node->dim(i).value();
+  }
+  return shape;
+}
+
+template <DataType DT> const void *getNodeDataImpl(const luci::CircleConst *node, size_t *data_size)
+{
+  const size_t element_size = getDataTypeSize(DT);
+  const int32_t num_elements = node->size<DT>();
+
+  *data_size = num_elements * element_size;
+  if (*data_size > 0)
+  {
+    // FIXME There is no good way to get the pointer to the data currently.
+    return &node->at<DT>(0);
+  }
+  return nullptr;
+}
+
+const void *getNodeData(const luci::CircleConst *node, size_t *data_size)
+{
+  switch (node->dtype())
+  {
+    case DataType::U8:
+      return getNodeDataImpl<DataType::U8>(node, data_size);
+    case DataType::FLOAT32:
+      return getNodeDataImpl<DataType::FLOAT32>(node, data_size);
+    case DataType::S32:
+      return getNodeDataImpl<DataType::S32>(node, data_size);
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+bool isExecutableNode(const luci::CircleNode *node)
+{
+  switch (node->opcode())
+  {
+    // These nodes denote inputs / outputs of a graph.
+    case luci::CircleOpcode::CONST:
+    case luci::CircleOpcode::CIRCLEINPUT:
+    case luci::CircleOpcode::CIRCLEOUTPUT:
+    // The following nodes denote outputs of multiple-output nodes.
+    case luci::CircleOpcode::CIRCLEIFOUT:
+    case luci::CircleOpcode::CIRCLESPLITOUT:
+    case luci::CircleOpcode::CIRCLEUNPACKOUT:
+      return false;
+    default:
+      return true;
+  }
+}
+
+bool isTensorProducingNode(const luci::CircleNode *node)
+{
+  switch (node->opcode())
+  {
+    // Output nodes do not produce tensors.
+    case luci::CircleOpcode::CIRCLEOUTPUT:
+    // The following nodes are multiple-output nodes. They do not produce tensors, the tensors
+    // are produced by the corresponding *Out nodes instead.
+    case luci::CircleOpcode::IF:
+    case luci::CircleOpcode::SPLIT:
+    case luci::CircleOpcode::UNPACK:
+      return false;
+    default:
+      return true;
+  }
+}
+
+} // namespace
+
+GraphLoader::GraphLoader(const ModuleLoader &module_loader, const loco::Graph *graph,
+                         RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
+                         std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
+    : _module_loader(module_loader), _graph(graph), _runtime_graph(runtime_graph),
+      _runtime_to_ir(runtime_to_ir), _node_to_tensor(node_to_tensor)
+{
+}
+
+void GraphLoader::loadTensors()
+{
+  for (uint32_t i = 0; i < _graph->nodes()->size(); ++i)
+  {
+    const auto *node = loco::must_cast<const luci::CircleNode *>(_graph->nodes()->at(i));
+
+    if (!isTensorProducingNode(node))
+      continue;
+
+    // Only Input and Const nodes have shapes. Shapes of intermediate tensors will be inferred.
+    Shape shape{};
+    if (const auto *input_node = dynamic_cast<const luci::CircleInput *>(node))
+    {
+      shape = getNodeShape(input_node);
+    }
+    else if (const auto *const_node = dynamic_cast<const luci::CircleConst *>(node))
+    {
+      shape = getNodeShape(const_node);
+    }
+
+    AffineQuantization quantization;
+    if (node->quantparam() != nullptr)
+    {
+      const luci::CircleQuantParam *params = node->quantparam();
+      quantization.scale.assign(params->scale.cbegin(), params->scale.cend());
+      quantization.zero_point.assign(params->zerop.cbegin(), params->zerop.cend());
+    }
+
+    auto tensor = std::make_unique<Tensor>(node->dtype(), std::move(shape), std::move(quantization),
+                                           node->name());
+
+    if (const auto *const_node = dynamic_cast<const luci::CircleConst *>(node))
+    {
+      size_t data_size{};
+      const void *const_data = getNodeData(const_node, &data_size);
+      if (const_data != nullptr)
+        tensor->writeData(const_data, data_size);
+    }
+
+    _node_to_tensor.emplace(node, tensor.get());
+    _runtime_to_ir.tensor_to_node.emplace(tensor.get(), node);
+
+    _runtime_graph->addTensor(std::move(tensor));
+  }
+}
+
+void GraphLoader::initInputOutputTensors() const
+{
+  auto input_nodes = loco::input_nodes(_graph);
+  std::vector<Tensor *> input_tensors(input_nodes.size());
+  for (size_t i = 0; i < input_nodes.size(); ++i)
+  {
+    input_tensors[i] = _node_to_tensor.at(input_nodes[i]);
+  }
+  _runtime_graph->setInputTensors(input_tensors);
+
+  auto output_nodes = loco::output_nodes(const_cast<loco::Graph *>(_graph));
+  std::vector<Tensor *> output_tensors(output_nodes.size());
+  for (size_t i = 0; i < output_nodes.size(); ++i)
+  {
+    const auto *node = loco::must_cast<const luci::CircleOutput *>(output_nodes[i]);
+    output_tensors[i] = _node_to_tensor.at(node->from());
+  }
+  _runtime_graph->setOutputTensors(output_tensors);
+}
+
+void GraphLoader::loadOperators()
+{
+  KernelBuilder kernel_builder(_module_loader, *this);
+
+  // Create kernels for executable nodes. This has to be done in execution order.
+  for (const loco::Node *loco_node :
+       loco::postorder_traversal(loco::output_nodes(const_cast<loco::Graph *>(_graph))))
+  {
+    const auto *node = loco::must_cast<const luci::CircleNode *>(loco_node);
+
+    if (isExecutableNode(node))
+    {
+      std::unique_ptr<Kernel> kernel = node->accept(&kernel_builder);
+      _runtime_to_ir.kernel_to_node.emplace(kernel.get(), node);
+      _runtime_graph->addKernel(std::move(kernel));
+    }
+  }
+}
+
+void GraphLoader::load()
+{
+  loadTensors();
+  initInputOutputTensors();
+  loadOperators();
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/GraphLoader.h b/compiler/luci-interpreter/src/loader/GraphLoader.h
new file mode 100644
index 000000000..e0adc0f6c
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/GraphLoader.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_LOADER_GRAPHLOADER_H
+#define LUCI_INTERPRETER_LOADER_GRAPHLOADER_H
+
+#include "core/RuntimeGraph.h"
+#include "loader/RuntimeToIR.h"
+
+#include <loco/IR/Graph.h>
+
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+
+class ModuleLoader;
+
+class GraphLoader
+{
+public:
+  GraphLoader(const ModuleLoader &module_loader, const loco::Graph *graph,
+              RuntimeGraph *runtime_graph, RuntimeToIR &runtime_to_ir,
+              std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor);
+
+  void load();
+
+  Tensor *getTensorForNode(const loco::Node *node) const { return _node_to_tensor.at(node); }
+
+private:
+  void loadOperators();
+  void initInputOutputTensors() const;
+  void loadTensors();
+
+  const ModuleLoader &_module_loader;
+  const loco::Graph *_graph;
+  RuntimeGraph *_runtime_graph;
+  RuntimeToIR &_runtime_to_ir;
+
+  std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_LOADER_GRAPHLOADER_H
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.cpp b/compiler/luci-interpreter/src/loader/KernelBuilder.cpp
new file mode 100644
index 000000000..56da961dd
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/KernelBuilder.cpp
@@ -0,0 +1,529 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loader/KernelBuilder.h"
+
+#include "kernels/Add.h"
+#include "kernels/ArgMax.h"
+#include "kernels/AveragePool2D.h"
+#include "kernels/Concatenation.h"
+#include "kernels/Conv2D.h"
+#include "kernels/DepthwiseConv2D.h"
+#include "kernels/Elu.h"
+#include "kernels/FullyConnected.h"
+#include "kernels/If.h"
+#include "kernels/L2Normalize.h"
+#include "kernels/L2Pool2D.h"
+#include "kernels/LeakyRelu.h"
+#include "kernels/LocalResponseNormalization.h"
+#include "kernels/Logistic.h"
+#include "kernels/MaxPool2D.h"
+#include "kernels/Mean.h"
+#include "kernels/Mul.h"
+#include "kernels/Pad.h"
+#include "kernels/Reshape.h"
+#include "kernels/Softmax.h"
+#include "kernels/SpaceToDepth.h"
+#include "kernels/Split.h"
+#include "kernels/StridedSlice.h"
+#include "kernels/Squeeze.h"
+#include "kernels/Unpack.h"
+#include "kernels/Transpose.h"
+#include "kernels/TransposeConv.h"
+#include "loader/GraphLoader.h"
+#include "loader/ModuleLoader.h"
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+template <typename CircleNodeOut>
+static std::vector<const loco::Node *> collectOutputNodes(const luci::CircleNode *node)
+{
+  std::vector<const CircleNodeOut *> output_nodes;
+  for (const loco::Node *loco_node : loco::succs(node))
+  {
+    output_nodes.push_back(loco::must_cast<const CircleNodeOut *>(loco_node));
+  }
+  std::sort(output_nodes.begin(), output_nodes.end(),
+            [](const CircleNodeOut *node1, const CircleNodeOut *node2) {
+              return node1->index() < node2->index();
+            });
+  return {output_nodes.cbegin(), output_nodes.cend()};
+}
+
+const Tensor *KernelBuilder::getInputTensor(const loco::Node *node) const
+{
+  const Tensor *tensor = _graph_loader.getTensorForNode(node);
+  assert(tensor != nullptr);
+  return tensor;
+}
+
+const Tensor *KernelBuilder::getOptionalInputTensor(const loco::Node *node) const
+{
+  // TODO Revise this when optional inputs are implemented in the IR.
+  return getInputTensor(node);
+}
+
+Tensor *KernelBuilder::getOutputTensor(const loco::Node *node) const
+{
+  Tensor *tensor = _graph_loader.getTensorForNode(node);
+  assert(tensor != nullptr);
+  return tensor;
+}
+
+std::vector<Tensor *>
+KernelBuilder::getOutputTensors(const std::vector<const loco::Node *> &nodes) const
+{
+  std::vector<Tensor *> tensors;
+  tensors.reserve(nodes.size());
+  for (const loco::Node *node : nodes)
+    tensors.push_back(getOutputTensor(node));
+  return tensors;
+}
+
+RuntimeGraph *KernelBuilder::getRuntimeGraph(const loco::Graph *graph) const
+{
+  RuntimeGraph *runtime_graph = _module_loader.getRuntimeGraph(graph);
+  assert(runtime_graph != nullptr);
+  return runtime_graph;
+}
+
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleAdd *node)
+{
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = getInputTensor(node->x());
+  const Tensor *input2 = getInputTensor(node->y());
+  Tensor *output = getOutputTensor(node);
+
+  AddParams params{};
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::Add>(input1, input2, output, params);
+}
+
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleArgMax *node)
+{
+  assert(node->arity() == 2);
+  const Tensor *input1 = getInputTensor(node->input());
+  const Tensor *input2 = getInputTensor(node->dimension());
+  Tensor *output = getOutputTensor(node);
+
+  ArgMaxParams params{};
+  params.output_type = node->output_type();
+
+  return std::make_unique<kernels::ArgMax>(input1, input2, output, params);
+}
+
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleAveragePool2D *node)
+{
+  assert(node->arity() == 1);
+
+  const Tensor *input = getInputTensor(node->value());
+  Tensor *output = getOutputTensor(node);
+
+  Pool2DParams params{};
+  params.padding = node->padding();
+  params.filter_height = node->filter()->h();
+  params.filter_width = node->filter()->w();
+  params.stride_height = node->stride()->h();
+  params.stride_width = node->stride()->w();
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::AveragePool2D>(input, output, params);
+}
+
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleConcatenation *node)
+{
+  std::vector<const Tensor *> inputs(node->numValues());
+  for (uint32_t i = 0; i < node->numValues(); ++i)
+  {
+    inputs[i] = getInputTensor(node->values(i));
+  }
+  Tensor *output = getOutputTensor(node);
+
+  ConcatenationParams params{};
+  params.axis = node->axis();
+
+  return std::make_unique<kernels::Concatenation>(std::move(inputs), output, params);
+}
+
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleConst *)
+{
+  throw std::runtime_error("Const node cannot be executed.");
+}
+
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleConv2D *node)
+{
+  assert(node->arity() == 3);
+
+  const Tensor *input = getInputTensor(node->input());
+  const Tensor *filter = getInputTensor(node->filter());
+  const Tensor *bias = getInputTensor(node->bias());
+  Tensor *output = getOutputTensor(node);
+
+  Conv2DParams params{};
+  params.padding = node->padding();
+  params.stride_height = node->stride()->h();
+  params.stride_width = node->stride()->w();
+  params.dilation_height_factor = node->dilation()->h();
+  params.dilation_width_factor = node->dilation()->w();
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::Conv2D>(input, filter, bias, output, params);
+}
+
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleDepthwiseConv2D *node)
+{
+  assert(node->arity() == 3);
+
+  const Tensor *input = getInputTensor(node->input());
+  const Tensor *filter = getInputTensor(node->filter());
+  const Tensor *bias = getInputTensor(node->bias());
+  Tensor *output = getOutputTensor(node);
+
+  DepthwiseConv2DParams params{};
+  params.padding = node->padding();
+  params.depth_multiplier = node->depthMultiplier();
+  params.stride_height = node->stride()->h();
+  params.stride_width = node->stride()->w();
+  params.dilation_height_factor = node->dilation()->h();
+  params.dilation_width_factor = node->dilation()->w();
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::DepthwiseConv2D>(input, filter, bias, output, params);
+}
+
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleElu *node)
+{
+  assert(node->arity() == 1);
+
+  const Tensor *input = getInputTensor(node->features());
+  Tensor *output = getOutputTensor(node);
+
+  return std::make_unique<kernels::Elu>(input, output);
+}
+
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleFullyConnected *node)
+{
+  assert(node->arity() == 3);
+
+  const Tensor *input = getInputTensor(node->input());
+  const Tensor *filter = getInputTensor(node->weights());
+  const Tensor *bias = getOptionalInputTensor(node->bias());
+  Tensor *output = getOutputTensor(node);
+
+  FullyConnectedParams params{};
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::FullyConnected>(input, filter, bias, output, params);
+}
+
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleIf *node)
+{
+  auto output_nodes = collectOutputNodes<luci::CircleIfOut>(node);
+  assert(node->arity() == 1 + node->input_count());
+  assert(output_nodes.size() == static_cast<size_t>(node->output_count()));
+
+  const Tensor *cond = getInputTensor(node->cond());
+  std::vector<const Tensor *> inputs(node->input_count());
+  for (uint32_t i = 0; i < node->input_count(); ++i)
+  {
+    inputs[i] = getInputTensor(node->input(i));
+  }
+  std::vector<Tensor *> outputs = getOutputTensors(output_nodes);
+
+  RuntimeGraph *then_graph = getRuntimeGraph(node->then_graph());
+  RuntimeGraph *else_graph = getRuntimeGraph(node->else_graph());
+
+  return std::make_unique<kernels::If>(cond, std::move(inputs), std::move(outputs), then_graph,
+                                       else_graph);
+}
+
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleL2Normalize *node)
+{
+  assert(node->arity() == 1);
+
+  const Tensor *input = getInputTensor(node->x());
+  Tensor *output = getOutputTensor(node);
+
+  L2NormParams params{};
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::L2Normalize>(input, output, params);
+}
+
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleL2Pool2D *node)
+{
+  assert(node->arity() == 1);
+
+  const Tensor *input = getInputTensor(node->value());
+  Tensor *output = getOutputTensor(node);
+
+  Pool2DParams params{};
+  params.padding = node->padding();
+  params.filter_height = node->filter()->h();
+  params.filter_width = node->filter()->w();
+  params.stride_height = node->stride()->h();
+  params.stride_width = node->stride()->w();
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::L2Pool2D>(input, output, params);
+}
+
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleLeakyRelu *node)
+{
+  assert(node->arity() == 1);
+  const Tensor *input = getInputTensor(node->features());
+  Tensor *output = getOutputTensor(node);
+
+  LeakyReluParams params{};
+  params.alpha = node->alpha();
+
+  return std::make_unique<kernels::LeakyRelu>(input, output, params);
+}
+
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleLocalResponseNormalization *node)
+{
+  assert(node->arity() == 1);
+  const Tensor *input = getInputTensor(node->input());
+  Tensor *output = getOutputTensor(node);
+
+  LocalResponseNormalizationParams params{};
+  params.radius = node->radius();
+  params.bias = node->bias();
+  params.alpha = node->alpha();
+  params.beta = node->beta();
+
+  return std::make_unique<kernels::LocalResponseNormalization>(input, output, params);
+}
+
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleLogistic *node)
+{
+  assert(node->arity() == 1);
+
+  const Tensor *input = getInputTensor(node->x());
+  Tensor *output = getOutputTensor(node);
+
+  return std::make_unique<kernels::Logistic>(input, output);
+}
+
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleInput *)
+{
+  throw std::runtime_error("Input node cannot be executed.");
+}
+
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleMaxPool2D *node)
+{
+  assert(node->arity() == 1);
+
+  const Tensor *input = getInputTensor(node->value());
+  Tensor *output = getOutputTensor(node);
+
+  Pool2DParams params{};
+  params.padding = node->padding();
+  params.filter_height = node->filter()->h();
+  params.filter_width = node->filter()->w();
+  params.stride_height = node->stride()->h();
+  params.stride_width = node->stride()->w();
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::MaxPool2D>(input, output, params);
+}
+
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleMean *node)
+{
+  assert(node->arity() == 2);
+
+  const Tensor *input = getInputTensor(node->input());
+  const Tensor *axes = getInputTensor(node->reduction_indices());
+  Tensor *output = getOutputTensor(node);
+
+  ReducerParams params{};
+  params.keep_dims = node->keep_dims();
+
+  return std::make_unique<kernels::Mean>(input, axes, output, params);
+}
+
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleMul *node)
+{
+  assert(node->arity() == 2);
+
+  const Tensor *input1 = getInputTensor(node->x());
+  const Tensor *input2 = getInputTensor(node->y());
+  Tensor *output = getOutputTensor(node);
+
+  MulParams params{};
+  params.activation = node->fusedActivationFunction();
+
+  return std::make_unique<kernels::Mul>(input1, input2, output, params);
+}
+
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleOutput *)
+{
+  throw std::runtime_error("Output node cannot be executed.");
+}
+
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CirclePad *node)
+{
+  assert(node->arity() == 2);
+
+  const Tensor *input = getInputTensor(node->input());
+  const Tensor *paddings = getInputTensor(node->paddings());
+  Tensor *output = getOutputTensor(node);
+
+  return std::make_unique<kernels::Pad>(input, paddings, output);
+}
+
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleReshape *node)
+{
+  assert(node->arity() == 2);
+
+  const Tensor *input = getInputTensor(node->tensor());
+  const Tensor *shape = getInputTensor(node->shape());
+  Tensor *output = getOutputTensor(node);
+
+  // NOTE 'newShape' attribute is ignored.
+  return std::make_unique<kernels::Reshape>(input, shape, output);
+}
+
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSoftmax *node)
+{
+  assert(node->arity() == 1);
+
+  const Tensor *input = getInputTensor(node->logits());
+  Tensor *output = getOutputTensor(node);
+
+  SoftmaxParams params{};
+  params.beta = node->beta();
+
+  return std::make_unique<kernels::Softmax>(input, output, params);
+}
+
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSpaceToDepth *node)
+{
+  assert(node->arity() == 1);
+  const Tensor *input = getInputTensor(node->input());
+
+  Tensor *output = getOutputTensor(node);
+
+  SpaceToDepthParams params{};
+  params.block_size = node->block_size();
+
+  return std::make_unique<kernels::SpaceToDepth>(input, output, params);
+}
+
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSplit *node)
+{
+  auto output_nodes = collectOutputNodes<luci::CircleSplitOut>(node);
+  assert(node->arity() == 2);
+  assert(output_nodes.size() == static_cast<size_t>(node->num_split()));
+
+  const Tensor *axis = getInputTensor(node->split_dim());
+  const Tensor *input = getInputTensor(node->input());
+  std::vector<Tensor *> outputs = getOutputTensors(output_nodes);
+
+  // NOTE 'num_splits' attribute is ignored.
+  return std::make_unique<kernels::Split>(axis, input, std::move(outputs));
+}
+
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleStridedSlice *node)
+{
+  assert(node->arity() == 4);
+
+  const Tensor *input = getInputTensor(node->input());
+  const Tensor *begin = getInputTensor(node->begin());
+  const Tensor *end = getInputTensor(node->end());
+  const Tensor *strides = getInputTensor(node->strides());
+
+  Tensor *output = getOutputTensor(node);
+
+  StridedSliceParams params{};
+  params.begin_mask = node->begin_mask();
+  params.ellipsis_mask = node->ellipsis_mask();
+  params.end_mask = node->end_mask();
+  params.new_axis_mask = node->new_axis_mask();
+  params.shrink_axis_mask = node->shrink_axis_mask();
+
+  return std::make_unique<kernels::StridedSlice>(input, begin, end, strides, output, params);
+}
+
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleSqueeze *node)
+{
+  assert(node->arity() == 1);
+
+  const Tensor *input = getInputTensor(node->input());
+  Tensor *output = getOutputTensor(node);
+
+  SqueezeParams params{};
+  assert(node->squeeze_dims().size() <= 4);
+  for (size_t i = 0; i < node->squeeze_dims().size(); i++)
+  {
+    params.squeeze_dims.push_back(node->squeeze_dims().at(i));
+  }
+
+  return std::make_unique<kernels::Squeeze>(input, output, params);
+}
+
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTransposeConv *node)
+{
+  assert(node->arity() == 3);
+
+  const Tensor *input_sizes = getInputTensor(node->inputSizes());
+  const Tensor *filter = getInputTensor(node->filter());
+  const Tensor *out_backprop = getInputTensor(node->outBackprop());
+
+  Tensor *output = getOutputTensor(node);
+
+  TransposeConvParams params{};
+  params.padding = node->padding();
+  params.stride_height = node->stride()->h();
+  params.stride_width = node->stride()->w();
+
+  return std::make_unique<kernels::TransposeConv>(input_sizes, filter, out_backprop, output,
+                                                  params);
+}
+
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleUnpack *node)
+{
+  auto output_nodes = collectOutputNodes<luci::CircleUnpackOut>(node);
+  assert(node->arity() == 1);
+  assert(output_nodes.size() == static_cast<size_t>(node->num()));
+
+  const Tensor *input = getInputTensor(node->value());
+  std::vector<Tensor *> outputs = getOutputTensors(output_nodes);
+
+  UnpackParams params{};
+  params.axis = node->axis();
+
+  // NOTE 'num' attribute is ignored.
+  return std::make_unique<kernels::Unpack>(input, std::move(outputs), params);
+}
+
+std::unique_ptr<Kernel> KernelBuilder::visit(const luci::CircleTranspose *node)
+{
+  assert(node->arity() == 2);
+
+  const Tensor *input = getInputTensor(node->a());
+  const Tensor *perm = getInputTensor(node->perm());
+  Tensor *output = getOutputTensor(node);
+
+  return std::make_unique<kernels::Transpose>(input, perm, output);
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/KernelBuilder.h b/compiler/luci-interpreter/src/loader/KernelBuilder.h
new file mode 100644
index 000000000..7e30d395b
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/KernelBuilder.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_LOADER_KERNELBUILDER_H
+#define LUCI_INTERPRETER_LOADER_KERNELBUILDER_H
+
+#include "core/Kernel.h"
+#include "core/RuntimeGraph.h"
+
+#include <luci/IR/CircleNodeVisitor.h>
+
+#include <memory>
+#include <vector>
+
+namespace luci_interpreter
+{
+
+class GraphLoader;
+class ModuleLoader;
+
+class KernelBuilder : public luci::CircleNodeVisitor<std::unique_ptr<Kernel>>
+{
+public:
+  KernelBuilder(const ModuleLoader &module_loader, const GraphLoader &graph_loader)
+      : _module_loader(module_loader), _graph_loader(graph_loader)
+  {
+  }
+
+  std::unique_ptr<Kernel> visit(const luci::CircleAdd *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleArgMax *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleAveragePool2D *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleConcatenation *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleConv2D *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleConst *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleDepthwiseConv2D *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleElu *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleFullyConnected *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleIf *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleL2Normalize *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleL2Pool2D *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleLeakyRelu *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleLocalResponseNormalization *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleLogistic *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleInput *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleMaxPool2D *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleMean *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleMul *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleOutput *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CirclePad *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleReshape *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleSoftmax *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleSpaceToDepth *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleSplit *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleStridedSlice *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleSqueeze *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleTranspose *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleTransposeConv *node) override;
+  std::unique_ptr<Kernel> visit(const luci::CircleUnpack *node) override;
+
+private:
+  const Tensor *getInputTensor(const loco::Node *node) const;
+
+  const Tensor *getOptionalInputTensor(const loco::Node *node) const;
+
+  Tensor *getOutputTensor(const loco::Node *node) const;
+
+  std::vector<Tensor *> getOutputTensors(const std::vector<const loco::Node *> &nodes) const;
+
+  RuntimeGraph *getRuntimeGraph(const loco::Graph *graph) const;
+
+private:
+  const ModuleLoader &_module_loader;
+  const GraphLoader &_graph_loader;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_LOADER_KERNELBUILDER_H
diff --git a/compiler/luci-interpreter/src/loader/ModuleLoader.cpp b/compiler/luci-interpreter/src/loader/ModuleLoader.cpp
new file mode 100644
index 000000000..7780a61b6
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/ModuleLoader.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ModuleLoader.h"
+
+#include "GraphLoader.h"
+
+namespace luci_interpreter
+{
+
+ModuleLoader::ModuleLoader(const luci::Module *module, RuntimeModule *runtime_module,
+                           RuntimeToIR &runtime_to_ir,
+                           std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor)
+    : _module(module), _runtime_module(runtime_module), _runtime_to_ir(runtime_to_ir),
+      _node_to_tensor(node_to_tensor)
+{
+}
+
+void ModuleLoader::load()
+{
+  // Runtime graphs have to be created in advance, because they will be needed during the loading
+  // process for control flow nodes.
+  for (size_t i = 0; i < _module->size(); ++i)
+  {
+    _graph_to_runtime_graph.emplace(_module->graph(i), _runtime_module->addGraph());
+  }
+  for (size_t i = 0; i < _module->size(); ++i)
+  {
+    const loco::Graph *graph = _module->graph(i);
+    RuntimeGraph *runtime_graph = _graph_to_runtime_graph.at(graph);
+    GraphLoader loader(*this, graph, runtime_graph, _runtime_to_ir, _node_to_tensor);
+    loader.load();
+  }
+}
+
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/loader/ModuleLoader.h b/compiler/luci-interpreter/src/loader/ModuleLoader.h
new file mode 100644
index 000000000..954dbfb61
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/ModuleLoader.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_LOADER_MODULELOADER_H
+#define LUCI_INTERPRETER_LOADER_MODULELOADER_H
+
+#include "core/RuntimeModule.h"
+#include "loader/RuntimeToIR.h"
+
+#include <luci/IR/Module.h>
+
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+
+class ModuleLoader
+{
+public:
+  ModuleLoader(const luci::Module *module, RuntimeModule *runtime_module,
+               RuntimeToIR &runtime_to_ir,
+               std::unordered_map<const loco::Node *, Tensor *> &node_to_tensor);
+
+  void load();
+
+  RuntimeGraph *getRuntimeGraph(const loco::Graph *graph) const
+  {
+    return _graph_to_runtime_graph.at(graph);
+  }
+
+private:
+  const luci::Module *_module;
+  RuntimeModule *_runtime_module;
+  RuntimeToIR &_runtime_to_ir;
+  std::unordered_map<const loco::Node *, Tensor *> &_node_to_tensor;
+  std::unordered_map<const loco::Graph *, RuntimeGraph *> _graph_to_runtime_graph;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_LOADER_MODULELOADER_H
diff --git a/compiler/luci-interpreter/src/loader/RuntimeToIR.h b/compiler/luci-interpreter/src/loader/RuntimeToIR.h
new file mode 100644
index 000000000..9ea8b1fa2
--- /dev/null
+++ b/compiler/luci-interpreter/src/loader/RuntimeToIR.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_LOADER_RUNTIMETOIR_H
+#define LUCI_INTERPRETER_LOADER_RUNTIMETOIR_H
+
+#include "luci_interpreter/core/Tensor.h"
+
+#include <luci/IR/CircleNode.h>
+
+#include <unordered_map>
+
+namespace luci_interpreter
+{
+
+// Maps runtime entities back to IR entities. It is used to implement observing functionality.
+struct RuntimeToIR
+{
+  std::unordered_map<const Tensor *, const luci::CircleNode *> tensor_to_node;
+  std::unordered_map<const Kernel *, const luci::CircleNode *> kernel_to_node;
+};
+
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_LOADER_RUNTIMETOIR_H