Imported Upstream version 1.10.0upstream/1.10.0 submit/tizen/20201028.104702 submit/tizen/20201028.031836 accepted/tizen/unified/20201029.124827

author: Chunseok Lee <chunseok.lee@samsung.com> 2020-10-28 12:16:55 +0900
committer: Chunseok Lee <chunseok.lee@samsung.com> 2020-10-28 12:16:55 +0900
commit: c55f8a6db48cda9d3a78048338b7f18c4cca62b8 (patch)
tree: 761ee8e171e5203f5c598ad93b2e7e0bc2e31aa2 /compiler/luci-interpreter/src/kernels
parent: 74476a2d0296bdad70a2f7f90bc7419a8b05bffd (diff)
download: nnfw-c55f8a6db48cda9d3a78048338b7f18c4cca62b8.tar.gz
nnfw-c55f8a6db48cda9d3a78048338b7f18c4cca62b8.tar.bz2
nnfw-c55f8a6db48cda9d3a78048338b7f18c4cca62b8.zip
117 files changed, 7800 insertions, 600 deletions
diff --git a/compiler/luci-interpreter/src/kernels/Add.cpp b/compiler/luci-interpreter/src/kernels/Add.cpp
index 9ed155e94..8d119d516 100644
--- a/compiler/luci-interpreter/src/kernels/Add.cpp
+++ b/compiler/luci-interpreter/src/kernels/Add.cpp
@@ -17,6 +17,7 @@
 
 #include "kernels/Add.h"
 
+#include "kernels/BinaryOpCommon.h"
 #include "kernels/Utils.h"
 
 #include <tensorflow/lite/kernels/internal/reference/add.h>
@@ -36,10 +37,13 @@ Add::Add(const Tensor *input1, const Tensor *input2, Tensor *output, const AddPa
 
 void Add::configure()
 {
-  if (input1()->element_type() != input2()->element_type())
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+  if (input1()->element_type() == DataType::S16)
   {
-    throw std::runtime_error("Input Tensor Data Type Mismatch.");
+    LUCI_INTERPRETER_CHECK(input1()->zero_point() == 0 && input2()->zero_point() == 0 &&
+                           output()->zero_point() == 0);
   }
+
   output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
 }
 
@@ -53,6 +57,9 @@ void Add::execute() const
     case DataType::U8:
       evalQuantized();
       break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
     default:
       throw std::runtime_error("Unsupported type.");
   }
@@ -140,5 +147,49 @@ void Add::evalQuantized() const
   }
 }
 
+void Add::evalQuantizedS16() const
+{
+  const auto input1_scale = static_cast<double>(input1()->scale());
+  const auto input2_scale = static_cast<double>(input2()->scale());
+  const auto output_scale = static_cast<double>(output()->scale());
+
+  constexpr int left_shift = 12;
+  const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
+  const double real_input1_multiplier = input1_scale / twice_max_input_scale;
+  const double real_input2_multiplier = input2_scale / twice_max_input_scale;
+  const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
+
+  int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
+  int input1_shift{}, input2_shift{}, output_shift{};
+  quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift);
+  quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift);
+  quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  auto fn = [input1_multiplier, input1_shift, //
+             input2_multiplier, input2_shift, //
+             output_multiplier, output_shift, //
+             activation_min, activation_max](int16_t input1_val, int16_t input2_val) {
+    const int32_t shifted_input1_val = static_cast<int32_t>(input1_val) << left_shift;
+    const int32_t shifted_input2_val = static_cast<int32_t>(input2_val) << left_shift;
+    const int32_t scaled_input1_val = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
+        shifted_input1_val, input1_multiplier, input1_shift);
+    const int32_t scaled_input2_val = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
+        shifted_input2_val, input2_multiplier, input2_shift);
+    const int32_t raw_sum = scaled_input1_val + scaled_input2_val;
+    const int32_t raw_output = tflite::MultiplyByQuantizedMultiplierSmallerThanOneExp(
+        raw_sum, output_multiplier, output_shift);
+    const int32_t clamped_output = std::min(activation_max, std::max(activation_min, raw_output));
+    return static_cast<int16_t>(clamped_output);
+  };
+
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<int16_t>(input1()),
+                        getTensorShape(input2()), getTensorData<int16_t>(input2()),
+                        getTensorShape(output()), getTensorData<int16_t>(output()), fn);
+}
+
 } // namespace kernels
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Add.h b/compiler/luci-interpreter/src/kernels/Add.h
index a1f7e0406..79518845d 100644
--- a/compiler/luci-interpreter/src/kernels/Add.h
+++ b/compiler/luci-interpreter/src/kernels/Add.h
@@ -40,6 +40,7 @@ public:
 private:
   void evalFloat() const;
   void evalQuantized() const;
+  void evalQuantizedS16() const;
 };
 
 } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/Add.test.cpp b/compiler/luci-interpreter/src/kernels/Add.test.cpp
index 705b648c8..de8a3bbb0 100644
--- a/compiler/luci-interpreter/src/kernels/Add.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Add.test.cpp
@@ -57,18 +57,10 @@ TEST(AddTest, Uint8)
   std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-3.f, 3.f);
   for (int i = 0; i < output_data.size(); i++)
   {
-    Tensor input1_tensor{
-        getElementType<uint8_t>(), base_shape, {{quant_param.first}, {quant_param.second}}, ""};
-    Tensor input2_tensor{
-        getElementType<uint8_t>(), test_shapes[i], {{quant_param.first}, {quant_param.second}}, ""};
-    std::vector<uint8_t> quantized_input1_value =
-        quantize<uint8_t>(base_data, quant_param.first, quant_param.second);
-    std::vector<uint8_t> quantized_input2_value =
-        quantize<uint8_t>(test_data, quant_param.first, quant_param.second);
-    input1_tensor.writeData(quantized_input1_value.data(),
-                            quantized_input1_value.size() * sizeof(uint8_t));
-    input2_tensor.writeData(quantized_input2_value.data(),
-                            quantized_input2_value.size() * sizeof(uint8_t));
+    Tensor input1_tensor =
+        makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, base_data);
+    Tensor input2_tensor = makeInputTensor<DataType::U8>(test_shapes[i], quant_param.first,
+                                                         quant_param.second, test_data);
     Tensor output_tensor =
         makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
 
@@ -79,26 +71,17 @@ TEST(AddTest, Uint8)
     kernel.configure();
     kernel.execute();
 
-    EXPECT_THAT(dequantize<uint8_t>(extractTensorData<uint8_t>(output_tensor),
-                                    output_tensor.scale(), output_tensor.zero_point()),
-                ElementsAreArray(ArrayFloatNear(output_data[i], kQuantizedTolerance)));
+    EXPECT_THAT(dequantizeTensorData(output_tensor),
+                FloatArrayNear(output_data[i], kQuantizedTolerance));
     EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i]));
   }
   // Re-run with exchanged inputs.
   for (int i = 0; i < output_data.size(); i++)
   {
-    Tensor input1_tensor{
-        getElementType<uint8_t>(), test_shapes[i], {{quant_param.first}, {quant_param.second}}, ""};
-    Tensor input2_tensor{
-        getElementType<uint8_t>(), base_shape, {{quant_param.first}, {quant_param.second}}, ""};
-    std::vector<uint8_t> quantized_input1_value =
-        quantize<uint8_t>(test_data, quant_param.first, quant_param.second);
-    std::vector<uint8_t> quantized_input2_value =
-        quantize<uint8_t>(base_data, quant_param.first, quant_param.second);
-    input1_tensor.writeData(quantized_input1_value.data(),
-                            quantized_input1_value.size() * sizeof(uint8_t));
-    input2_tensor.writeData(quantized_input2_value.data(),
-                            quantized_input2_value.size() * sizeof(uint8_t));
+    Tensor input1_tensor = makeInputTensor<DataType::U8>(test_shapes[i], quant_param.first,
+                                                         quant_param.second, test_data);
+    Tensor input2_tensor =
+        makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, base_data);
     Tensor output_tensor =
         makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
 
@@ -109,9 +92,8 @@ TEST(AddTest, Uint8)
     kernel.configure();
     kernel.execute();
 
-    EXPECT_THAT(dequantize<uint8_t>(extractTensorData<uint8_t>(output_tensor),
-                                    output_tensor.scale(), output_tensor.zero_point()),
-                ElementsAreArray(ArrayFloatNear(output_data[i], kQuantizedTolerance)));
+    EXPECT_THAT(dequantizeTensorData(output_tensor),
+                FloatArrayNear(output_data[i], kQuantizedTolerance));
     EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i]));
   }
 }
@@ -145,8 +127,7 @@ TEST(AddTest, Float)
     kernel.configure();
     kernel.execute();
 
-    EXPECT_THAT(extractTensorData<float>(output_tensor),
-                ::testing::ElementsAreArray(ArrayFloatNear(test_outputs[i], 0.0001f)))
+    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
         << "With shape number " << i;
   }
   // Re-run with exchanged inputs.
@@ -163,8 +144,72 @@ TEST(AddTest, Float)
     kernel.configure();
     kernel.execute();
 
-    EXPECT_THAT(extractTensorData<float>(output_tensor),
-                ::testing::ElementsAreArray(ArrayFloatNear(test_outputs[i], 0.0001f)))
+    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
+        << "With shape number " << i;
+  }
+}
+
+TEST(AddTest, SInt16)
+{
+  Shape base_shape = {2, 3, 1, 2};
+  std::vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  std::vector<std::vector<int32_t>> ref_output_shapes{
+      {2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
+
+  std::vector<float> input1_data{-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
+                                 1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
+  std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+  std::vector<std::vector<float>> ref_outputs = {
+      {0.0f, 2.6f, 0.0f, 2.8f, 0.7f, 3.2f, 1.1f, 0.8f, 0.5f, 1.0f, 1.9f, 1.4f,
+       1.0f, 0.0f, 0.4f, 0.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.8f, 3.3f, 2.2f, 3.7f,
+       0.0f, 0.3f, 0.0f, 0.5f, 0.0f, 0.9f, 0.9f, 0.0f, 0.3f, 0.0f, 1.7f, 0.0f},
+      {0.0f, 2.6f, 0.5f, 1.0f, 1.8f, 0.0f, 1.4f, 3.1f, 0.0f, 0.5f, 1.7f, 0.0f},
+      {0.0f, 2.5f, 0.0f, 2.6f, 0.0f, 1.9f, 1.1f, 0.7f, 1.2f, 0.8f, 0.5f, 0.1f,
+       1.0f, 0.0f, 1.1f, 0.0f, 0.4f, 0.0f, 1.7f, 3.3f, 2.2f, 3.8f, 2.1f, 3.7f,
+       0.0f, 0.5f, 0.0f, 1.0f, 0.0f, 0.9f, 1.2f, 0.0f, 1.7f, 0.0f, 1.6f, 0.0f},
+      {0.0f, 2.5f, 1.2f, 0.8f, 0.4f, 0.0f, 1.7f, 3.3f, 0.0f, 1.0f, 1.6f, 0.0f}};
+
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DataType::S16>(base_shape, 3.0 / 32767, 0, input1_data);
+    Tensor input2_tensor =
+        makeInputTensor<DataType::S16>(test_shapes[i], 1.0 / 32767, 0, input2_data);
+    Tensor output_tensor = makeOutputTensor(DataType::S16, 4.0 / 32767, 0);
+    const float tolerance = output_tensor.scale();
+
+    AddParams params{};
+    params.activation = Activation::RELU;
+
+    Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorShape(output_tensor),
+                ::testing::ElementsAreArray(ref_output_shapes[i]))
+        << "With shape number " << i;
+    EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance))
+        << "With shape number " << i;
+  }
+  // Re-run with exchanged inputs and different scales.
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor =
+        makeInputTensor<DataType::S16>(test_shapes[i], 2.0 / 32767, 0, input2_data);
+    Tensor input2_tensor = makeInputTensor<DataType::S16>(base_shape, 4.0 / 32767, 0, input1_data);
+    Tensor output_tensor = makeOutputTensor(DataType::S16, 5.0 / 32767, 0);
+    const float tolerance = output_tensor.scale();
+
+    AddParams params{};
+    params.activation = Activation::RELU;
+
+    Add kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorShape(output_tensor),
+                ::testing::ElementsAreArray(ref_output_shapes[i]))
+        << "With shape number " << i;
+    EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_outputs[i], tolerance))
         << "With shape number " << i;
   }
 }
diff --git a/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp b/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp
index 2ab7ff0da..c6734a114 100644
--- a/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/ArgMax.test.cpp
@@ -32,12 +32,9 @@ void Check(std::initializer_list<int32_t> input_shape,
            std::initializer_list<int32_t> output_shape, std::initializer_list<T1> input_data,
            std::initializer_list<int32_t> dimension_data, std::initializer_list<T2> output_data)
 {
-
-  Tensor input_tensor{getElementType<T1>(), input_shape, {}, ""};
-  input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(T1));
-  Tensor dimension_tensor{DataType::S32, dimension_shape, {}, ""};
-  dimension_tensor.writeData(dimension_data.begin(), dimension_data.size() * sizeof(int32_t));
-
+  constexpr DataType element_type = getElementType<T1>();
+  Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
+  Tensor dimension_tensor = makeInputTensor<DataType::S32>(dimension_shape, dimension_data);
   Tensor output_tensor = makeOutputTensor(getElementType<T2>());
 
   ArgMaxParams params{};
diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp b/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp
index cdd81d7d6..df54f9786 100644
--- a/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.cpp
@@ -18,6 +18,7 @@
 
 #include "kernels/Utils.h"
 
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
 #include <tensorflow/lite/kernels/internal/reference/pooling.h>
 
 #include <stdexcept>
@@ -61,11 +62,13 @@ void AveragePool2D::configure()
       computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width);
   if (input()->element_type() == DataType::U8)
   {
-    if (input()->scale() != output()->scale() || input()->zero_point() != output()->zero_point())
-    {
-      throw std::runtime_error(
-          "Quantization param for Input and output must be same(scale or zero-point)");
-    }
+    LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
+    LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point());
+  }
+  else if (input()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
   }
   output()->resize({batches, output_height, output_width, depth});
 }
@@ -80,6 +83,9 @@ void AveragePool2D::execute() const
     case DataType::U8:
       evalQuantized();
       break;
+    case DataType::S16:
+      evalSInt16();
+      break;
     default:
       throw std::runtime_error("Unsupported type.");
   }
@@ -126,5 +132,26 @@ void AveragePool2D::evalQuantized() const
                                      getTensorData<uint8_t>(output()));
 }
 
+void AveragePool2D::evalSInt16() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::PoolParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.filter_height = _params.filter_height;
+  params.filter_width = _params.filter_width;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  tflite::reference_integer_ops::AveragePool(
+      params, getTensorShape(input()), getTensorData<int16_t>(input()), //
+      getTensorShape(output()), getTensorData<int16_t>(output()));
+}
+
 } // namespace kernels
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.h b/compiler/luci-interpreter/src/kernels/AveragePool2D.h
index 91f212b3a..282a58797 100644
--- a/compiler/luci-interpreter/src/kernels/AveragePool2D.h
+++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.h
@@ -39,6 +39,7 @@ public:
 private:
   void evalFloat() const;
   void evalQuantized() const;
+  void evalSInt16() const;
 
 private:
   int32_t _padding_height{};
diff --git a/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp b/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp
index cc80e5e90..83e48c89d 100644
--- a/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/AveragePool2D.test.cpp
@@ -53,25 +53,21 @@ TEST(AveragePool2DTest, Float)
       0, 1.5, //
       4.5, 6, //
   };
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 1}));
 }
 
 TEST(AveragePool2DTest, Uint8_0)
 {
+  std::vector<float> input_data{
+      0,  -6, 12, 4, //
+      -3, -2, 10, 7, //
+  };
   std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
-  Tensor input_tensor{DataType::U8, {1, 2, 4, 1}, {{quant_param.first}, {quant_param.second}}, ""};
+  Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first,
+                                                      quant_param.second, input_data);
   Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
 
-  std::vector<uint8_t> quant_input = quantize<uint8_t>(
-      {
-          0, -6, 12, 4,  //
-          -3, -2, 10, 7, //
-      },
-      quant_param.first, quant_param.second);
-  input_tensor.writeData(quant_input.data(), quant_input.size() * sizeof(uint8_t));
-
   Pool2DParams params{};
   params.padding = Padding::VALID;
   params.filter_height = 2;
@@ -84,26 +80,22 @@ TEST(AveragePool2DTest, Uint8_0)
   kernel.configure();
   kernel.execute();
 
-  EXPECT_THAT(dequantize(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(),
-                         output_tensor.zero_point()),
-              ElementsAreArray(ArrayFloatNear({0.0, 6.0})));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0.0, 6.0}));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 1}));
 }
 
 TEST(AveragePool2DTest, Uint8_1)
 {
+  std::vector<float> input_data{
+      0, 6, 12, 4, //
+      3, 2, 10, 7, //
+  };
+
   std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
-  Tensor input_tensor{DataType::U8, {1, 2, 4, 1}, {{quant_param.first}, {quant_param.second}}, ""};
+  Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first,
+                                                      quant_param.second, input_data);
   Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
 
-  std::vector<uint8_t> quant_input = quantize<uint8_t>(
-      {
-          0, 6, 12, 4, //
-          3, 2, 10, 7, //
-      },
-      quant_param.first, quant_param.second);
-  input_tensor.writeData(quant_input.data(), quant_input.size() * sizeof(uint8_t));
-
   Pool2DParams params{};
   params.padding = Padding::VALID;
   params.filter_height = 2;
@@ -116,12 +108,42 @@ TEST(AveragePool2DTest, Uint8_1)
   kernel.configure();
   kernel.execute();
 
-  EXPECT_THAT(dequantize(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(),
-                         output_tensor.zero_point()),
-              ElementsAreArray(ArrayFloatNear({2.75, 6.0})));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({2.75, 6.0}));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 1, 2, 1}));
 }
 
+TEST(AveragePool2DTest, SInt16)
+{
+  Shape input_shape{1, 3, 5, 1};
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
+  std::vector<float> input_data{
+      -4, -3, -2, -1, 0,  //
+      1,  2,  3,  4,  5,  //
+      6,  7,  8,  9,  10, //
+  };
+  std::vector<float> ref_output_data{
+      0, 1.5, //
+      4.5, 6, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, 0.5, 0, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.5, 0);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 3;
+  params.stride_height = 1;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  AveragePool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
 TEST(AveragePool2DTest, Invalid_Input_Shape_NEG)
 {
   Shape input_shape{1, 3, 5};
@@ -170,20 +192,17 @@ TEST(AveragePool2DTest, In_Out_Type_NEG)
 
 TEST(AveragePool2DTest, Quant_Param_NEG)
 {
+  std::vector<float> input_data{
+      0,  -6, 12, 4, //
+      -3, -2, 10, 7, //
+  };
+
   std::pair<float, int32_t> quant_param1 = quantizationParams<uint8_t>(-15.9375f, 15.9375f);
   std::pair<float, int32_t> quant_param2 = quantizationParams<uint8_t>(-7.875f, 7.875f);
-  Tensor input_tensor{
-      DataType::U8, {1, 2, 4, 1}, {{quant_param1.first}, {quant_param1.second}}, ""};
+  Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param1.first,
+                                                      quant_param1.second, input_data);
   Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param2.first, quant_param2.second);
 
-  std::vector<uint8_t> quant_input = quantize<uint8_t>(
-      {
-          0, -6, 12, 4,  //
-          -3, -2, 10, 7, //
-      },
-      quant_param1.first, quant_param1.second);
-  input_tensor.writeData(quant_input.data(), quant_input.size() * sizeof(uint8_t));
-
   Pool2DParams params{};
   params.padding = Padding::VALID;
   params.filter_height = 2;
diff --git a/compiler/luci-interpreter/src/kernels/BinaryOpCommon.h b/compiler/luci-interpreter/src/kernels/BinaryOpCommon.h
new file mode 100644
index 000000000..62bd4158e
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/BinaryOpCommon.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_BINARYOPUTILS_H
+#define LUCI_INTERPRETER_KERNELS_BINARYOPUTILS_H
+
+#include "tensorflow/lite/kernels/internal/common.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+// Derived from tensorflow/lite/kernels/internal/reference/maximum_minimum.h (v2.3.0).
+template <typename T, typename Op, int N = 5>
+void BinaryOpBroadcastSlow(const tflite::RuntimeShape &unextended_input1_shape,
+                           const T *input1_data,
+                           const tflite::RuntimeShape &unextended_input2_shape,
+                           const T *input2_data,
+                           const tflite::RuntimeShape &unextended_output_shape, T *output_data,
+                           Op op)
+{
+  if (unextended_input1_shape == unextended_input2_shape)
+  {
+    const int flat_size = tflite::MatchingElementsSize(
+        unextended_input1_shape, unextended_input2_shape, unextended_output_shape);
+    for (int i = 0; i < flat_size; ++i)
+    {
+      output_data[i] = op(input1_data[i], input2_data[i]);
+    }
+  }
+  else
+  {
+    assert(unextended_input1_shape.DimensionsCount() <= N);
+    assert(unextended_input2_shape.DimensionsCount() <= N);
+    assert(unextended_output_shape.DimensionsCount() <= N);
+
+    tflite::NdArrayDesc<N> desc1{};
+    tflite::NdArrayDesc<N> desc2{};
+    tflite::NdArrayDesc<N> output_desc{};
+    tflite::NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, unextended_input2_shape,
+                                                &desc1, &desc2);
+    tflite::CopyDimsToDesc(tflite::RuntimeShape::ExtendedShape(N, unextended_output_shape),
+                           &output_desc);
+
+    auto fn = [&](int indexes[N]) {
+      output_data[SubscriptToIndex(output_desc, indexes)] =
+          op(input1_data[SubscriptToIndex(desc1, indexes)],
+             input2_data[SubscriptToIndex(desc2, indexes)]);
+    };
+    tflite::NDOpsHelper<N>(output_desc, fn);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_BINARYOPUTILS_H
diff --git a/compiler/luci-interpreter/src/kernels/CMakeLists.txt b/compiler/luci-interpreter/src/kernels/CMakeLists.txt
index 040ac5911..b460321bd 100644
--- a/compiler/luci-interpreter/src/kernels/CMakeLists.txt
+++ b/compiler/luci-interpreter/src/kernels/CMakeLists.txt
@@ -16,10 +16,22 @@ set(SOURCES
     DepthToSpace.cpp
     DepthwiseConv2D.h
     DepthwiseConv2D.cpp
+    Div.h
+    Div.cpp
     Elu.h
     Elu.cpp
+    Floor.h
+    Floor.cpp
+    FloorDiv.h
+    FloorDiv.cpp
+    Equal.h
+    Equal.cpp
     FullyConnected.h
     FullyConnected.cpp
+    Greater.h
+    Greater.cpp
+    GreaterEqual.h
+    GreaterEqual.cpp
     If.h
     If.cpp
     L2Normalize.h
@@ -28,20 +40,44 @@ set(SOURCES
     L2Pool2D.cpp
     LeakyRelu.h
     LeakyRelu.cpp
+    Less.h
+    Less.cpp
+    LessEqual.h
+    LessEqual.cpp
     LocalResponseNormalization.h
     LocalResponseNormalization.cpp
     Logistic.h
     Logistic.cpp
+    LogSoftmax.h
+    LogSoftmax.cpp
+    Maximum.h
+    Maximum.cpp
     MaxPool2D.h
     MaxPool2D.cpp
     Mean.h
     Mean.cpp
+    Minimum.h
+    Minimum.cpp
     Mul.h
     Mul.cpp
+    NotEqual.h
+    NotEqual.cpp
     Pad.h
     Pad.cpp
+    Pow.h
+    Pow.cpp
+    Prelu.h
+    Prelu.cpp
+    Relu.h
+    Relu.cpp
+    Relu6.h
+    Relu6.cpp
     Reshape.h
     Reshape.cpp
+    ResizeBilinear.h
+    ResizeBilinear.cpp
+    ResizeNearestNeighbor.h
+    ResizeNearestNeighbor.cpp
     Reverse.h
     Reverse.cpp
     Rsqrt.h
@@ -60,6 +96,8 @@ set(SOURCES
     Sqrt.cpp
     Squeeze.h
     Squeeze.cpp
+    Sub.h
+    Sub.cpp
     Tanh.h
     Tanh.cpp
     Transpose.h
@@ -69,7 +107,11 @@ set(SOURCES
     Unpack.h
     Unpack.cpp)
 
-list(APPEND SOURCES Utils.h Utils.cpp ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
+list(APPEND SOURCES
+    BinaryOpCommon.h
+    Utils.h
+    Utils.cpp
+    ${TensorFlowSource_DIR}/tensorflow/lite/kernels/internal/quantization_util.cc)
 
 add_library(luci_interpreter_kernels STATIC ${SOURCES})
 set_target_properties(luci_interpreter_kernels PROPERTIES POSITION_INDEPENDENT_CODE ON)
@@ -92,19 +134,37 @@ set(TEST_SOURCES
     Conv2D.test.cpp
     DepthToSpace.test.cpp
     DepthwiseConv2D.test.cpp
+    Div.test.cpp
     Elu.test.cpp
+    Floor.test.cpp
+    FloorDiv.test.cpp
+    Equal.test.cpp
     FullyConnected.test.cpp
+    Greater.test.cpp
+    GreaterEqual.test.cpp
     If.test.cpp
     L2Normalize.test.cpp
     L2Pool2D.test.cpp
     LeakyRelu.test.cpp
+    Less.test.cpp
+    LessEqual.test.cpp
     LocalResponseNormalization.test.cpp
     Logistic.test.cpp
+    LogSoftmax.test.cpp
+    Maximum.test.cpp
     MaxPool2D.test.cpp
     Mean.test.cpp
+    Minimum.test.cpp
     Mul.test.cpp
+    NotEqual.test.cpp
     Pad.test.cpp
+    Pow.test.cpp
+    Prelu.test.cpp
+    Relu.test.cpp
+    Relu6.test.cpp
     Reshape.test.cpp
+    ResizeBilinear.test.cpp
+    ResizeNearestNeighbor.test.cpp
     Reverse.test.cpp
     Rsqrt.test.cpp
     Slice.test.cpp
@@ -114,6 +174,7 @@ set(TEST_SOURCES
     StridedSlice.test.cpp
     Sqrt.test.cpp
     Squeeze.test.cpp
+    Sub.test.cpp
     Tanh.test.cpp
     Transpose.test.cpp
     TransposeConv.test.cpp
diff --git a/compiler/luci-interpreter/src/kernels/Concatenation.cpp b/compiler/luci-interpreter/src/kernels/Concatenation.cpp
index 812ab7609..6f8820446 100644
--- a/compiler/luci-interpreter/src/kernels/Concatenation.cpp
+++ b/compiler/luci-interpreter/src/kernels/Concatenation.cpp
@@ -36,20 +36,20 @@ Concatenation::Concatenation(std::vector<const Tensor *> inputs, Tensor *output,
 void Concatenation::configure()
 {
   const int num_inputs = _inputs.size();
-  assert(num_inputs > 0);
+  LUCI_INTERPRETER_CHECK(num_inputs > 0);
   const Tensor *t0 = _inputs[0];
 
   int axis = _params.axis;
   if (axis < 0)
     axis += t0->shape().num_dims();
-  assert(axis >= 0 && axis < t0->shape().num_dims());
+  LUCI_INTERPRETER_CHECK(axis >= 0 && axis < t0->shape().num_dims());
 
   int32_t sum_axis = t0->shape().dim(axis);
   for (int i = 1; i < num_inputs; ++i)
   {
     const Tensor *tensor = _inputs[i];
-    assert(tensor->element_type() == t0->element_type());
-    assert(tensor->shape().num_dims() == t0->shape().num_dims());
+    LUCI_INTERPRETER_CHECK(tensor->element_type() == t0->element_type());
+    LUCI_INTERPRETER_CHECK(tensor->shape().num_dims() == t0->shape().num_dims());
     for (int d = 0; d < t0->shape().num_dims(); ++d)
     {
       if (d == axis)
@@ -58,7 +58,7 @@ void Concatenation::configure()
       }
       else
       {
-        assert(tensor->shape().dim(d) == t0->shape().dim(d));
+        LUCI_INTERPRETER_CHECK(tensor->shape().dim(d) == t0->shape().dim(d));
       }
     }
   }
diff --git a/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp b/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp
index d9a7097d0..91707a256 100644
--- a/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Concatenation.test.cpp
@@ -44,7 +44,7 @@ TEST(ConcatenationTest, Float)
     kernel.execute();
 
     EXPECT_THAT(extractTensorData<float>(output_tensor),
-                ElementsAreArray(ArrayFloatNear({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12})));
+                FloatArrayNear({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}));
   }
   {
     params.axis = -2; // Same as '0'.
@@ -54,7 +54,7 @@ TEST(ConcatenationTest, Float)
     kernel.execute();
 
     EXPECT_THAT(extractTensorData<float>(output_tensor),
-                ElementsAreArray(ArrayFloatNear({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12})));
+                FloatArrayNear({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}));
   }
   {
     params.axis = 1;
@@ -64,7 +64,7 @@ TEST(ConcatenationTest, Float)
     kernel.execute();
 
     EXPECT_THAT(extractTensorData<float>(output_tensor),
-                ElementsAreArray(ArrayFloatNear({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12})));
+                FloatArrayNear({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12}));
   }
   {
     params.axis = -1; // Same as '1'.
@@ -74,10 +74,96 @@ TEST(ConcatenationTest, Float)
     kernel.execute();
 
     EXPECT_THAT(extractTensorData<float>(output_tensor),
-                ElementsAreArray(ArrayFloatNear({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12})));
+                FloatArrayNear({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12}));
   }
 }
 
+TEST(ConcatenationTest, Input_Number_Check_NEG)
+{
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  params.axis = -1;
+
+  Concatenation kernel({}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ConcatenationTest, Invalid_Axis_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data);
+  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input2_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  params.axis = -3;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ConcatenationTest, Mismatching_Input_Type_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<uint8_t> input2_data{7, 8, 9, 10, 11, 12};
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data);
+  Tensor input2_tensor = makeInputTensor<DataType::U8>({2, 3}, input2_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  params.axis = -1;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ConcatenationTest, Mismatching_Input_Dimension_Num_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<float> input2_data{7, 8, 9, 10, 11, 12};
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data);
+  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({1, 2, 3}, input2_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  params.axis = -1;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ConcatenationTest, Mismatching_Input_Dimension_NEG)
+{
+  std::vector<float> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<float> input2_data{7, 8, 9, 10, 11, 12, 13, 14, 15};
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input1_data);
+  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, input2_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  ConcatenationParams params{};
+
+  params.axis = -1;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ConcatenationTest, Unsupported_Configure_Type_NEG)
+{
+  std::vector<int8_t> input1_data{1, 2, 3, 4, 5, 6};
+  std::vector<int8_t> input2_data{7, 8, 9, 10, 11, 12};
+  Tensor input1_tensor = makeInputTensor<DataType::S8>({2, 3}, input1_data);
+  Tensor input2_tensor = makeInputTensor<DataType::S8>({2, 3}, input2_data);
+  Tensor output_tensor = makeOutputTensor(DataType::S8);
+  ConcatenationParams params{};
+
+  params.axis = -1;
+
+  Concatenation kernel({&input1_tensor, &input2_tensor}, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
 } // namespace
 } // namespace kernels
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp
index 0446d9760..be8364528 100644
--- a/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Conv2D.test.cpp
@@ -66,8 +66,7 @@ TEST(Conv2DTest, Float)
       0,  40, 0, 44, // row = 1
   };
   std::vector<int32_t> ref_output_shape{1, 2, 2, 2};
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
 }
 
@@ -114,46 +113,38 @@ TEST(Conv2DTest, FloatCheck)
       37, 4, 3, // second batch, right
   };
   std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
 }
 
 TEST(Conv2DTest, Uint8)
 {
+  std::vector<float> input_data{
+      // First batch
+      1, 1, 1, 1, // row = 1
+      2, 2, 2, 2, // row = 2
+                  // Second batch
+      1, 2, 3, 4, // row = 1
+      1, 2, 3, 4, // row = 2
+  };
+  std::vector<float> filter_data{
+      1,  2,  3,  4, // first 2x2 filter
+      -1, 1,  -1, 1, // second 2x2 filter
+      -1, -1, 1,  1, // third 2x2 filter
+  };
+  std::vector<float> bias_data{1, 2, 3};
+
   std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
   std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
-  Shape bias_shape = {3};
-  Tensor input_tensor{
-      DataType::U8, {2, 2, 4, 1}, {{input_quant_param.first}, {input_quant_param.second}}, ""};
-  Tensor filter_tensor{
-      DataType::U8, {3, 2, 2, 1}, {{input_quant_param.first}, {input_quant_param.second}}, ""};
-  Tensor bias_tensor{
-      DataType::S32, bias_shape, {{input_quant_param.first * input_quant_param.first}, {0}}, ""};
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>({2, 2, 4, 1}, input_quant_param.first,
+                                                      input_quant_param.second, input_data);
+  Tensor filter_tensor = makeInputTensor<DataType::U8>({3, 2, 2, 1}, input_quant_param.first,
+                                                       input_quant_param.second, filter_data);
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(
+      {3}, input_quant_param.first * input_quant_param.first, 0, bias_data);
   Tensor output_tensor =
       makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
-  std::vector<uint8_t> quantized_input = quantize<uint8_t>(
-      {
-          // First batch
-          1, 1, 1, 1, // row = 1
-          2, 2, 2, 2, // row = 2
-          // Second batch
-          1, 2, 3, 4, // row = 1
-          1, 2, 3, 4, // row = 2
-      },
-      input_quant_param.first, input_quant_param.second);
-  std::vector<uint8_t> quantized_filter = quantize<uint8_t>(
-      {
-          1, 2, 3, 4,   // first 2x2 filter
-          -1, 1, -1, 1, // second 2x2 filter
-          -1, -1, 1, 1, // third 2x2 filter
-      },
-      input_quant_param.first, input_quant_param.second);
-  std::vector<int32_t> bias_data =
-      quantize<int32_t>({1, 2, 3}, input_quant_param.first * input_quant_param.first, 0);
-  input_tensor.writeData(quantized_input.data(), quantized_input.size() * sizeof(uint8_t));
-  filter_tensor.writeData(quantized_filter.data(), quantized_filter.size() * sizeof(uint8_t));
-  bias_tensor.writeData(bias_data.data(), bias_data.size() * sizeof(int32_t));
 
   Conv2DParams params{};
   params.padding = Padding::VALID;
@@ -174,9 +165,7 @@ TEST(Conv2DTest, Uint8)
       37, 4, 3, // second batch, right
   };
   std::vector<int32_t> ref_output_shape{2, 1, 2, 3};
-  EXPECT_THAT(dequantize<uint8_t>(extractTensorData<uint8_t>(output_tensor),
-                                  output_quant_param.first, output_quant_param.second),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
 }
 
diff --git a/compiler/luci-interpreter/src/kernels/DepthToSpace.cpp b/compiler/luci-interpreter/src/kernels/DepthToSpace.cpp
index cab63e26d..57238313c 100644
--- a/compiler/luci-interpreter/src/kernels/DepthToSpace.cpp
+++ b/compiler/luci-interpreter/src/kernels/DepthToSpace.cpp
@@ -30,20 +30,10 @@ DepthToSpace::DepthToSpace(const Tensor *input, Tensor *output, const DepthToSpa
 
 void DepthToSpace::configure()
 {
-  if (input()->shape().num_dims() != 4)
-  {
-    throw std::runtime_error("Invalid input num_dims.");
-  }
-  if (output()->element_type() != DataType::FLOAT32 && output()->element_type() != DataType::U8 &&
-      output()->element_type() != DataType::S8 && output()->element_type() != DataType::S32 &&
-      output()->element_type() != DataType::S64)
-  {
-    throw std::runtime_error("Invalid output type");
-  }
-  if (input()->element_type() != output()->element_type())
-  {
-    throw std::runtime_error("Type mismatch on input and output.");
-  }
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32 ||
+                         output()->element_type() == DataType::U8)
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type())
   const int block_size = params().block_size;
   const int32_t input_height = input()->shape().dim(1);
   const int32_t input_width = input()->shape().dim(2);
@@ -52,9 +42,9 @@ void DepthToSpace::configure()
   int32_t output_width = input_width * block_size;
   int32_t output_channels = input_channels / block_size / block_size;
 
-  assert(input_height == output_height / block_size);
-  assert(input_width == output_width / block_size);
-  assert(input_channels == output_channels * block_size * block_size);
+  LUCI_INTERPRETER_CHECK(input_height == output_height / block_size);
+  LUCI_INTERPRETER_CHECK(input_width == output_width / block_size);
+  LUCI_INTERPRETER_CHECK(input_channels == output_channels * block_size * block_size);
 
   Shape output_shape(4);
   output_shape.dim(0) = input()->shape().dim(0);
diff --git a/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp b/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp
index 1b805702d..3dee4ad36 100644
--- a/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/DepthToSpace.test.cpp
@@ -55,6 +55,51 @@ TYPED_TEST(DepthToSpaceTest, SimpleCase)
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
 }
 
+TEST(DepthToSpaceTest, InvalidInputShape_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8};
+  Shape input_shape{1, 2, 4};
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  DepthToSpaceParams params{};
+  params.block_size = 2;
+
+  DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(DepthToSpaceTest, InOutTypeMismatch_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8};
+  Shape input_shape{1, 1, 2, 4};
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  DepthToSpaceParams params{};
+  params.block_size = 2;
+
+  DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(DepthToSpaceTest, InvalidBlockSize_NEG)
+{
+  std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8};
+  Shape input_shape{1, 1, 2, 4};
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  DepthToSpaceParams params{};
+  params.block_size = 3;
+
+  DepthToSpace kernel = DepthToSpace(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
 } // namespace
 } // namespace kernels
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
index b01a5e086..99d52715b 100644
--- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
@@ -48,33 +48,33 @@ void DepthwiseConv2D::configure()
   // We only support (1) and (3) for now.
   if (input()->element_type() == DataType::FLOAT32 && filter()->element_type() == DataType::FLOAT32)
   {
-    assert(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
   }
   else if (input()->element_type() == DataType::U8 && filter()->element_type() == DataType::U8)
   {
-    assert(bias() == nullptr || bias()->element_type() == DataType::S32);
+    LUCI_INTERPRETER_CHECK(bias() == nullptr || bias()->element_type() == DataType::S32);
   }
   else
   {
     throw std::runtime_error("Unsupported type.");
   }
-  assert(output()->element_type() == input()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == input()->element_type());
 
   const Shape &input_shape = input()->shape();
   const Shape &filter_shape = filter()->shape();
-  assert(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(input_shape.num_dims() == 4 && filter_shape.num_dims() == 4);
 
   const int32_t batches = input_shape.dim(0);
   const int32_t input_height = input_shape.dim(1);
   const int32_t input_width = input_shape.dim(2);
   // Filter format: [1, H, W, O].
-  assert(filter_shape.dim(0) == 1);
+  LUCI_INTERPRETER_CHECK(filter_shape.dim(0) == 1);
   const int32_t filter_height = filter_shape.dim(1);
   const int32_t filter_width = filter_shape.dim(2);
   const int32_t channels_out = filter_shape.dim(3);
 
-  assert(bias() == nullptr ||
-         (bias()->shape().num_dims() == 1 && bias()->shape().dim(0) == channels_out));
+  LUCI_INTERPRETER_CHECK(bias() == nullptr || (bias()->shape().num_dims() == 1 &&
+                                               bias()->shape().dim(0) == channels_out));
 
   const int32_t output_height =
       computeOutputSize(_params.padding, input_height, filter_height, _params.stride_height,
diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp
index a9b43d864..a5128289f 100644
--- a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.test.cpp
@@ -66,47 +66,37 @@ TEST(DepthwiseConv2DTest, Float)
       71,  0, 99,  0,  //
       167, 0, 227, 28, //
   };
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 4}));
 }
 
 TEST(DepthwiseConv2DTest, Uint8)
 {
+  std::vector<float> input_data{
+      1, 2, 7,  8,  // column 1
+      3, 4, 9,  10, // column 2
+      5, 6, 11, 12, // column 3
+  };
+  std::vector<float> filter_data{
+      1,  2,   3,   4,   //
+      -9, 10,  -11, 12,  //
+      5,  6,   7,   8,   //
+      13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+
   std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
   std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
 
-  Tensor input_tensor{
-      DataType::U8, {1, 3, 2, 2}, {{input_quant_param.first}, {input_quant_param.second}}, ""};
-  Tensor filter_tensor{
-      DataType::U8, {1, 2, 2, 4}, {{input_quant_param.first}, {input_quant_param.second}}, ""};
-  Tensor bias_tensor{
-      DataType::S32, {4}, {{input_quant_param.first * input_quant_param.first}, {0}}, ""};
+  Tensor input_tensor = makeInputTensor<DataType::U8>({1, 3, 2, 2}, input_quant_param.first,
+                                                      input_quant_param.second, input_data);
+  Tensor filter_tensor = makeInputTensor<DataType::U8>({1, 2, 2, 4}, input_quant_param.first,
+                                                       input_quant_param.second, filter_data);
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(
+      {4}, input_quant_param.first * input_quant_param.first, 0, bias_data);
   Tensor output_tensor =
       makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
 
-  std::vector<uint8_t> quant_input = quantize<uint8_t>(
-      {
-          1, 2, 7, 8,   // column 1
-          3, 4, 9, 10,  // column 2
-          5, 6, 11, 12, // column 3
-      },
-      input_quant_param.first, input_quant_param.second);
-  std::vector<uint8_t> quant_filter = quantize<uint8_t>(
-      {
-          1, 2, 3, 4,       //
-          -9, 10, -11, 12,  //
-          5, 6, 7, 8,       //
-          13, -14, 15, -16, //
-      },
-      input_quant_param.first, input_quant_param.second);
-  std::vector<int32_t> quant_bias =
-      quantize<int32_t>({1, 2, 3, 4}, input_quant_param.first * input_quant_param.first, 0);
-
-  input_tensor.writeData(quant_input.data(), quant_input.size() * sizeof(uint8_t));
-  filter_tensor.writeData(quant_filter.data(), quant_filter.size() * sizeof(uint8_t));
-  bias_tensor.writeData(quant_bias.data(), quant_bias.size() * sizeof(int32_t));
-
   DepthwiseConv2DParams params{};
   params.padding = Padding::VALID;
   params.depth_multiplier = 2;
@@ -124,12 +114,190 @@ TEST(DepthwiseConv2DTest, Uint8)
       71, -34, 99,  -20, //
       91, -26, 127, -4,  //
   };
-  EXPECT_THAT(dequantize(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(),
-                         output_tensor.zero_point()),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 1, 4}));
 }
 
+TEST(DepthwiseConv2DTest, InvalidBiasType_NEG)
+{
+  Shape input_shape{1, 4, 2, 2};
+  Shape filter_shape{1, 2, 2, 4};
+  Shape bias_shape{4};
+  std::vector<float> input_data{
+      1,  2,  7,  8,  //
+      3,  4,  9,  10, //
+      5,  6,  11, 12, //
+      13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+      1,  2,   3,   4,   //
+      -9, 10,  -11, 12,  //
+      5,  6,   7,   8,   //
+      13, -14, 15,  -16, //
+  };
+  std::vector<int32_t> bias_data{1, 2, 3, 4};
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(DepthwiseConv2DTest, InOutTypeMismatch_NEG)
+{
+  Shape input_shape{1, 4, 2, 2};
+  Shape filter_shape{1, 2, 2, 4};
+  Shape bias_shape{4};
+  std::vector<float> input_data{
+      1,  2,  7,  8,  //
+      3,  4,  9,  10, //
+      5,  6,  11, 12, //
+      13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+      1,  2,   3,   4,   //
+      -9, 10,  -11, 12,  //
+      5,  6,   7,   8,   //
+      13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
+  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(DepthwiseConv2DTest, InvalidInputShape_NEG)
+{
+  Shape input_shape{4, 2, 2};
+  Shape filter_shape{2, 2, 4};
+  Shape bias_shape{4};
+  std::vector<float> input_data{
+      1,  2,  7,  8,  //
+      3,  4,  9,  10, //
+      5,  6,  11, 12, //
+      13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+      1,  2,   3,   4,   //
+      -9, 10,  -11, 12,  //
+      5,  6,   7,   8,   //
+      13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
+  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(DepthwiseConv2DTest, InvalidFilterShape_NEG)
+{
+  Shape input_shape{1, 4, 2, 2};
+  Shape filter_shape{2, 1, 2, 4};
+  Shape bias_shape{4};
+  std::vector<float> input_data{
+      1,  2,  7,  8,  //
+      3,  4,  9,  10, //
+      5,  6,  11, 12, //
+      13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+      1,  2,   3,   4,   //
+      -9, 10,  -11, 12,  //
+      5,  6,   7,   8,   //
+      13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
+  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(DepthwiseConv2DTest, InvalidBiasDim_NEG)
+{
+  Shape input_shape{1, 4, 2, 2};
+  Shape filter_shape{1, 2, 4, 2};
+  Shape bias_shape{4};
+  std::vector<float> input_data{
+      1,  2,  7,  8,  //
+      3,  4,  9,  10, //
+      5,  6,  11, 12, //
+      13, 14, 15, 16, //
+  };
+  std::vector<float> filter_data{
+      1,  2,   3,   4,   //
+      -9, 10,  -11, 12,  //
+      5,  6,   7,   8,   //
+      13, -14, 15,  -16, //
+  };
+  std::vector<float> bias_data{1, 2, 3, 4};
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor filter_tensor = makeInputTensor<DataType::FLOAT32>(filter_shape, filter_data);
+  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  DepthwiseConv2DParams params{};
+  params.padding = Padding::VALID;
+  params.depth_multiplier = 2;
+  params.stride_height = 2;
+  params.stride_width = 1;
+  params.dilation_height_factor = 1;
+  params.dilation_width_factor = 1;
+  params.activation = Activation::RELU;
+
+  DepthwiseConv2D kernel(&input_tensor, &filter_tensor, &bias_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
 } // namespace
 } // namespace kernels
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Div.cpp b/compiler/luci-interpreter/src/kernels/Div.cpp
new file mode 100644
index 000000000..e75876b3a
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Div.cpp
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Div.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Div::Div(const Tensor *input1, const Tensor *input2, Tensor *output, const DivParams &params)
+    : KernelWithParams<DivParams>({input1, input2}, {output}, params)
+{
+}
+
+void Div::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type());
+
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Div::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Div::evalFloat() const
+{
+  float activation_min{};
+  float activation_max{};
+  calculateActivationRange(_params.activation, &activation_min, &activation_max);
+
+  tflite::ArithmeticParams params{};
+  params.float_activation_min = activation_min;
+  params.float_activation_max = activation_max;
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+      getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastDivSlow(
+        params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+        getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<float>(input1()),
+                               getTensorShape(input2()), getTensorData<float>(input2()),
+                               getTensorShape(output()), getTensorData<float>(output()));
+  }
+}
+
+void Div::evalQuantized() const
+{
+  const auto input1_scale = static_cast<double>(input1()->scale());
+  const auto input2_scale = static_cast<double>(input2()->scale());
+  const auto output_scale = static_cast<double>(output()->scale());
+
+  const double real_output_multiplier = input1_scale / (input2_scale * output_scale);
+
+  int32_t output_multiplier{};
+  int output_shift{};
+
+  quantizeMultiplier(real_output_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::ArithmeticParams params{};
+
+  params.input1_offset = -input1()->zero_point(); // Note the '-'.
+  params.input2_offset = -input2()->zero_point(); // Note the '-'.
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = output_multiplier;
+  params.output_shift = output_shift;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+      getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastDivSlow(
+        params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
+        getTensorShape(input2()), getTensorData<uint8_t>(input2()), getTensorShape(output()),
+        getTensorData<uint8_t>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Div(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
+                               getTensorShape(input2()), getTensorData<uint8_t>(input2()),
+                               getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Div.h b/compiler/luci-interpreter/src/kernels/Div.h
new file mode 100644
index 000000000..6040cdd02
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Div.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_DIV_H
+#define LUCI_INTERPRETER_KERNELS_DIV_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Div : public KernelWithParams<DivParams>
+{
+public:
+  Div(const Tensor *input1, const Tensor *input2, Tensor *output, const DivParams &params);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_DIV_H
diff --git a/compiler/luci-interpreter/src/kernels/Div.test.cpp b/compiler/luci-interpreter/src/kernels/Div.test.cpp
new file mode 100644
index 000000000..77eb2e9c1
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Div.test.cpp
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Div.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+float GetTolerance(float min, float max)
+{
+  const float kQuantizedStep = (max - min) / 255.0f;
+  const float kQuantizedTolerance = 2.0f * kQuantizedStep + kQuantizedStep * kQuantizedStep;
+  return kQuantizedTolerance;
+}
+
+TEST(DivTest, Float)
+{
+  Shape base_shape = {2, 3, 1, 1};
+
+  std::vector<int32_t> output_shape = {2, 3, 1, 1};
+
+  std::vector<float> input1_data{0.3f, 2.3f, 0.9f, 0.5f, 0.8f, 1.1f};
+  std::vector<float> input2_data{0.2f, 1.6f, 0.5f, 0.4f, 1.6f, 0.4f};
+  std::vector<float> test_outputs{1.5f, 1.4375f, 1.8f, 1.25f, 0.5f, 2.75f};
+
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data);
+  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input2_data);
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  DivParams params{};
+  params.activation = Activation::RELU;
+
+  Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(DivTest, FloatBroadcast)
+{
+  Shape input1_shape = {1, 3};
+  Shape input2_shape = {3, 1};
+
+  std::vector<float> input1_data{-0.3f, 2.3f, 0.9f};
+  std::vector<float> input2_data{0.2f, 1.6f, 0.5f};
+  std::vector<float> test_outputs{0.f, 11.5f, 4.5f, 0.f, 1.4375f, 0.5625f, 0.f, 4.6f, 1.8f};
+
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data);
+  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data);
+
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  DivParams params{};
+  params.activation = Activation::RELU;
+
+  Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f));
+}
+
+TEST(DivTest, Uint8)
+{
+  Shape base_shape = {1, 2, 2, 1};
+
+  std::vector<int32_t> output_shape = {1, 2, 2, 1};
+
+  std::vector<float> input1_data = {-0.8f, -0.2f, 0.3f, 0.7f};
+  std::vector<float> input2_data = {-0.8f, 0.4f, 0.8f, 1.0f};
+  std::vector<float> test_outputs{1.0f, 0.f, 0.375f, 0.7f};
+
+  const float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.f, 1.f);
+
+  Tensor input1_tensor =
+      makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, input1_data);
+  Tensor input2_tensor =
+      makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, input2_data);
+
+  Tensor output_tensor =
+      makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+
+  DivParams params{};
+  params.activation = Activation::RELU;
+
+  Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(test_outputs, kQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(DivTest, Input_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2});
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  DivParams params{};
+  params.activation = Activation::RELU;
+
+  Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(DivTest, Invalid_Input_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1});
+  Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2});
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  DivParams params{};
+  params.activation = Activation::RELU;
+
+  Div kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  kernel.configure();
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Elu.cpp b/compiler/luci-interpreter/src/kernels/Elu.cpp
index 5de4a1f3b..456396055 100644
--- a/compiler/luci-interpreter/src/kernels/Elu.cpp
+++ b/compiler/luci-interpreter/src/kernels/Elu.cpp
@@ -31,7 +31,7 @@ Elu::Elu(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
 
 void Elu::configure()
 {
-  assert(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
   output()->resize(input()->shape());
 }
 
diff --git a/compiler/luci-interpreter/src/kernels/Elu.test.cpp b/compiler/luci-interpreter/src/kernels/Elu.test.cpp
index 52444cbea..0235d6552 100644
--- a/compiler/luci-interpreter/src/kernels/Elu.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Elu.test.cpp
@@ -29,9 +29,7 @@ using namespace testing;
 void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
            std::initializer_list<float> input_data, std::initializer_list<float> output_data)
 {
-  Tensor input_tensor{DataType::FLOAT32, input_shape, {}, ""};
-  input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(float));
-
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   Elu kernel(&input_tensor, &output_tensor);
@@ -39,8 +37,7 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
   kernel.execute();
 
   (void)output_shape;
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ::testing::ElementsAreArray(ArrayFloatNear(output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
 }
 
 TEST(EluTest, SimpleElu)
@@ -59,6 +56,20 @@ TEST(EluTest, SimpleElu)
       });
 }
 
+TEST(EluTest, InOutTypeMismatch_NEG)
+{
+  Shape input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+      0, -6, 2,  -4,   //
+      3, -2, 10, -0.1, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Elu kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
 } // namespace
 } // namespace kernels
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Equal.cpp b/compiler/luci-interpreter/src/kernels/Equal.cpp
new file mode 100644
index 000000000..f58de1250
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Equal.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Equal.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Equal::Equal(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void Equal::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+  if (x()->element_type() == DataType::U8)
+  {
+    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+  }
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void Equal::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Equal::evalFloat() const
+{
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowEqual(op_params, getTensorShape(x()), x_data,
+                                                getTensorShape(y()), y_data,
+                                                getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::Equal(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+                                 y_data, getTensorShape(output()), output_data);
+  }
+}
+
+void Equal::evalQuantized() const
+{
+  const auto x_data = getTensorData<uint8_t>(x());
+  const auto y_data = getTensorData<uint8_t>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.left_shift = 8;
+  op_params.input1_offset = -x()->zero_point(); // Note the '-'
+  op_params.input1_shift = _x_shift;
+  op_params.input1_multiplier = _x_multiplier;
+  op_params.input2_offset = -y()->zero_point(); // Note the '-'
+  op_params.input2_shift = _y_shift;
+  op_params.input2_multiplier = _y_multiplier;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowEqualWithScaling(op_params, getTensorShape(x()), x_data,
+                                                           getTensorShape(y()), y_data,
+                                                           getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::EqualWithScaling(op_params, getTensorShape(x()), x_data,
+                                            getTensorShape(y()), y_data, getTensorShape(output()),
+                                            output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Equal.h b/compiler/luci-interpreter/src/kernels/Equal.h
new file mode 100644
index 000000000..69b3be774
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Equal.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_EQUAL_H
+#define LUCI_INTERPRETER_KERNELS_EQUAL_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Equal : public Kernel
+{
+public:
+  Equal(const Tensor *x, const Tensor *y, Tensor *output);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _x_multiplier = 0;
+  int32_t _x_shift = 0;
+  int32_t _y_multiplier = 0;
+  int32_t _y_shift = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_EQUAL_H
diff --git a/compiler/luci-interpreter/src/kernels/Equal.test.cpp b/compiler/luci-interpreter/src/kernels/Equal.test.cpp
new file mode 100644
index 000000000..fb0de8bbf
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Equal.test.cpp
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Equal.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(EqualTest, FloatSimple)
+{
+  std::vector<float> x_data{
+      0.5, 0.7, 0.9, // Row 1
+      1,   0,   -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+      0.9, 0.7, 0.5, // Row 1
+      -1,  0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+      false, true, false, // Row 1
+      false, true, false, // Row 2
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data);
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST(EqualTest, FloatBroardcast)
+{
+  std::vector<float> x_data{
+      0.5, 0.7, 0.9, // Row 1
+      1,   0,   -1,  // Row 2
+      -1,  0,   1,   // Row 3
+      0.9, 0.7, 0.5, // Row 4
+  };
+
+  std::vector<float> y_data{
+      0.9, 0.7, 0.5, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+      false, true,  false, // Row 1
+      false, false, false, // Row 2
+      false, false, false, // Row 3
+      true,  true,  true,  // Row 4
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({4, 3}, x_data);
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+const float F_MIN = -128.0 / 128.0;
+const float F_MAX = 127.0 / 128.0;
+
+TEST(EqualTest, Uint8Quantized)
+{
+  std::vector<float> x_data{
+      0.5, 0.5, 0.7,  0.9, // Row 1
+      1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+      0.9, 0.5, 0.55, 0.5, // Row 1
+      -1,  0,   0.05, 1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+      false, true, false, false, // Row 1
+      false, true, true,  false, // Row 2
+  };
+
+  std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first,
+                                                  x_quant_param.second, x_data);
+
+  std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 2);
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first,
+                                                  y_quant_param.second, y_data);
+
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST(EqualTest, Uint8QuantizedBroadcast)
+{
+  std::vector<float> x_data{
+      0.4,  -0.8, 0.7,  0.3, // Row 1
+      -0.5, 0.1,  0,    0.5, // Row 2
+      1,    0,    0.05, -1,  // Row 3
+      -1,   0.05, 0,    1,   // Row 4
+  };
+
+  std::vector<float> y_data{
+      -1, 0.05, 0, 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+      false, false, false, false, // Row 1
+      false, false, true,  false, // Row 2
+      false, false, false, false, // Row 3
+      true,  true,  true,  true,  // Row 4
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor =
+      makeInputTensor<DataType::U8>({1, 4, 4, 1}, quant_param.first, quant_param.second, x_data);
+  Tensor y_tensor =
+      makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST(EqualTest, Input_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1});
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(EqualTest, Input_Output_Type_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Equal kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Floor.cpp b/compiler/luci-interpreter/src/kernels/Floor.cpp
new file mode 100644
index 000000000..e3c4246cc
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Floor.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Floor.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/floor.h>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Floor::Floor(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Floor::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  output()->resize(input()->shape());
+}
+
+void Floor::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Floor::evalFloat() const
+{
+  tflite::reference_ops::Floor(getTensorShape(input()), getTensorData<float>(input()),
+                               getTensorShape(output()), getTensorData<float>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Floor.h b/compiler/luci-interpreter/src/kernels/Floor.h
new file mode 100644
index 000000000..ca3ad5997
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Floor.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_FLOOR_H
+#define LUCI_INTERPRETER_KERNELS_FLOOR_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Floor : public Kernel
+{
+public:
+  Floor(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_FLOOR_H
diff --git a/compiler/luci-interpreter/src/kernels/Floor.test.cpp b/compiler/luci-interpreter/src/kernels/Floor.test.cpp
new file mode 100644
index 000000000..3e1ab6f3a
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Floor.test.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Floor.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(FloorTest, SimpleFloat)
+{
+  std::initializer_list<int32_t> input_shape{1, 2, 4, 1};
+  std::vector<float> input_data{
+      0.2, 8.6, 2.4,  4.3,  // Row 1
+      3,   7.1, 10.5, -0.9, // Row 2
+  };
+
+  std::initializer_list<int32_t> ref_output_shape{1, 2, 4, 1};
+  std::vector<float> ref_output_data{
+      0, 8, 2,  4,  // Row 1
+      3, 7, 10, -1, // Row 2
+  };
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Floor kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST(FloorTest, Input_Output_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  Floor kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/FloorDiv.cpp b/compiler/luci-interpreter/src/kernels/FloorDiv.cpp
new file mode 100644
index 000000000..b6f36cea3
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/FloorDiv.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/FloorDiv.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/binary_function.h>
+#include <cmath>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+FloorDiv::FloorDiv(const Tensor *input, const Tensor *alpha, Tensor *output)
+    : Kernel({input, alpha}, {output})
+{
+}
+
+void FloorDiv::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(y()->element_type() == output()->element_type());
+
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void FloorDiv::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void FloorDiv::evalFloat() const
+{
+  auto FloorDivFunc = [](float x, float y) -> float {
+    return std::floor(static_cast<double>(x) / static_cast<double>(y));
+  };
+
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+
+  // Check the denominator
+  for (int i = 0; i < getTensorShape(y()).FlatSize(); ++i)
+  {
+    LUCI_INTERPRETER_CHECK(y_data[i] != 0);
+  }
+
+  if (x()->shape() != y()->shape())
+  {
+    tflite::reference_ops::BroadcastBinaryFunction4DSlow<float, float, float>(
+        getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+        getTensorData<float>(output()), FloorDivFunc);
+  }
+  else
+  {
+    tflite::reference_ops::BinaryFunction<float, float, float>(
+        getTensorShape(x()), x_data, getTensorShape(y()), y_data, getTensorShape(output()),
+        getTensorData<float>(output()), FloorDivFunc);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/FloorDiv.h b/compiler/luci-interpreter/src/kernels/FloorDiv.h
new file mode 100644
index 000000000..e9c47d81a
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/FloorDiv.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_FLOOR_DIV_H
+#define LUCI_INTERPRETER_KERNELS_FLOOR_DIV_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class FloorDiv : public Kernel
+{
+public:
+  FloorDiv(const Tensor *x, const Tensor *y, Tensor *output);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_FLOOR_DIV_H
diff --git a/compiler/luci-interpreter/src/kernels/FloorDiv.test.cpp b/compiler/luci-interpreter/src/kernels/FloorDiv.test.cpp
new file mode 100644
index 000000000..a5bc700f7
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/FloorDiv.test.cpp
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/FloorDiv.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(FloorDivTest, FloatSimple)
+{
+  Shape x_shape{2, 3};
+  std::vector<float> x_data{
+      0.5, 2.4,  3.1,  // Row 1
+      1.9, -1.9, -2.8, // Row 2
+  };
+
+  Shape y_shape = x_shape;
+  std::vector<float> y_data{
+      2.0, 0.5,  3.0,  // Row 1
+      1.0, -1.0, -2.0, // Row 2
+  };
+
+  std::vector<int32_t> ref_output_shape{2, 3};
+  std::vector<float> ref_output_data{
+      0, 4, 1, // Row 1
+      1, 1, 1, // Row 2
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(x_shape, x_data);
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(y_shape, y_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST(FloorDivTest, FloatBroadcast)
+{
+  Shape x_shape{1, 3};
+  std::vector<float> x_data{
+      0.5, 2.4, -3.1, // Row 1
+  };
+
+  Shape y_shape{3, 3};
+  std::vector<float> y_data{
+      1.0, 1.0,  1.0,  // Row 1
+      2.0, -0.5, -2.0, // Row 2
+      0.3, 0.7,  0.9,  // Row 3
+  };
+
+  std::vector<int32_t> ref_output_shape{3, 3};
+  std::vector<float> ref_output_data{
+      0, 2,  -4, // Row 1
+      0, -5, 1,  // Row 2
+      1, 3,  -4, // Row 3
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(x_shape, x_data);
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(y_shape, y_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+}
+
+TEST(FloorDivTest, DivByZero_NEG)
+{
+  Shape shape{3};
+  std::vector<float> x_data{1, 0, -1};
+  std::vector<float> y_data{0, 0, 0};
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>(shape, x_data);
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>(shape, y_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+TEST(FloorDivTest, Input_Output_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(FloorDivTest, Input_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1});
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1});
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FloorDiv kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/FullyConnected.cpp b/compiler/luci-interpreter/src/kernels/FullyConnected.cpp
index 6529c5e77..7fa76d5e7 100644
--- a/compiler/luci-interpreter/src/kernels/FullyConnected.cpp
+++ b/compiler/luci-interpreter/src/kernels/FullyConnected.cpp
@@ -36,27 +36,54 @@ FullyConnected::FullyConnected(const Tensor *input, const Tensor *weights, const
 
 void FullyConnected::configure()
 {
-  if (weights()->element_type() != DataType::FLOAT32)
+  if (weights()->element_type() == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::U8);
+    LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::U8);
+    LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::S32)
+  }
+  else if (weights()->element_type() == DataType::FLOAT32)
+  {
+    LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
+    LUCI_INTERPRETER_CHECK(!bias() || bias()->element_type() == DataType::FLOAT32)
+  }
+  else
+  {
     throw std::runtime_error("Unsupported type.");
-
-  assert(input()->element_type() == DataType::FLOAT32);
-  assert(weights()->element_type() == DataType::FLOAT32);
-  assert(bias() == nullptr || bias()->element_type() == DataType::FLOAT32);
+  }
 
   const Shape &input_shape = input()->shape();
   const Shape &weights_shape = weights()->shape();
 
-  assert(weights_shape.num_dims() == 2);
-  assert(bias() == nullptr || bias()->shape().num_elements() == weights_shape.dim(0));
+  LUCI_INTERPRETER_CHECK(weights_shape.num_dims() == 2);
+  LUCI_INTERPRETER_CHECK(bias() == nullptr ||
+                         bias()->shape().num_elements() == weights_shape.dim(0));
 
-  assert(input_shape.num_elements() % weights_shape.dim(1) == 0);
+  LUCI_INTERPRETER_CHECK(input_shape.num_elements() % weights_shape.dim(1) == 0);
   const int32_t batch_size = input_shape.num_elements() / weights_shape.dim(1);
   const int32_t num_units = weights_shape.dim(0);
 
+  if (bias())
+    LUCI_INTERPRETER_CHECK(bias()->shape().num_elements() == weights()->shape().dim(0));
+
   output()->resize({batch_size, num_units});
 }
 
-void FullyConnected::execute() const { evalFloat(); }
+void FullyConnected::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::U8:
+      evalQuantized();
+      break;
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
 
 void FullyConnected::evalFloat() const
 {
@@ -75,5 +102,38 @@ void FullyConnected::evalFloat() const
       getTensorShape(output()), getTensorData<float>(output()));
 }
 
+void FullyConnected::evalQuantized() const
+{
+  double real_multiplier = 0.0;
+  int output_shift;
+  int32_t output_activation_min;
+  int32_t output_activation_max;
+  int32_t output_multiplier;
+  real_multiplier =
+      getQuantizedConvolutionMultipler(input()->scale(), weights()->scale(), output()->scale());
+  quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+  calculateActivationRangeQuantized(params().activation, output(), &output_activation_min,
+                                    &output_activation_max);
+
+  int32_t input_offset = -input()->zero_point();
+  int32_t filter_offset = -weights()->zero_point();
+  int32_t output_offset = output()->zero_point();
+
+  tflite::FullyConnectedParams op_params{};
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = output_multiplier;
+  op_params.output_shift = output_shift;
+  op_params.quantized_activation_min = output_activation_min;
+  op_params.quantized_activation_max = output_activation_max;
+  op_params.lhs_cacheable = false;
+  op_params.rhs_cacheable = false;
+  tflite::reference_ops::FullyConnected(
+      op_params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+      getTensorShape(weights()), getTensorData<uint8_t>(weights()), getTensorShape(bias()),
+      getTensorData<int32_t>(bias()), getTensorShape(output()), getTensorData<uint8_t>(output()));
+}
+
 } // namespace kernels
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/FullyConnected.h b/compiler/luci-interpreter/src/kernels/FullyConnected.h
index 2e3174c74..204f11ebb 100644
--- a/compiler/luci-interpreter/src/kernels/FullyConnected.h
+++ b/compiler/luci-interpreter/src/kernels/FullyConnected.h
@@ -41,6 +41,7 @@ public:
 
 private:
   void evalFloat() const;
+  void evalQuantized() const;
 };
 
 } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp b/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp
index 8077fcb5c..d194ce1a0 100644
--- a/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/FullyConnected.test.cpp
@@ -26,7 +26,85 @@ namespace
 
 using namespace testing;
 
-TEST(FullyConnectedTest, Float)
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> weights_shape,
+           std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> weights_data,
+           std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor weights_tensor = makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data);
+  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FullyConnectedParams params{};
+  params.activation = Activation::RELU;
+
+  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
+}
+
+template <>
+void Check<uint8_t>(
+    std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> weights_shape,
+    std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
+    std::initializer_list<float> input_data, std::initializer_list<float> weights_data,
+    std::initializer_list<float> bias_data, std::initializer_list<float> output_data)
+{
+  const float quantized_tolerance = getTolerance(-127, 128, 255);
+  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-63.5, 64);
+  std::pair<float, int32_t> output_quant_param = quantizationParams<uint8_t>(-127, 128);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, input_quant_param.first,
+                                                      input_quant_param.second, input_data);
+  Tensor weights_tensor = makeInputTensor<DataType::U8>(weights_shape, input_quant_param.first,
+                                                        input_quant_param.second, weights_data);
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(
+      bias_shape, input_quant_param.first * input_quant_param.first, 0, bias_data);
+  Tensor output_tensor =
+      makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+
+  FullyConnectedParams params{};
+  params.activation = Activation::RELU;
+
+  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, quantized_tolerance));
+}
+
+template <typename T> class FullyConnectedTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_CASE(FullyConnectedTest, DataTypes);
+
+TYPED_TEST(FullyConnectedTest, Simple)
+{
+  Check<TypeParam>({3, 2, 2, 1}, {3, 6}, {3}, {2, 3},
+                   {
+                       -3, -5, 5, 4, 9, -2,  // batch = 0
+                       -3, -2, -4, 9, -8, 1, // batch = 1
+                   },
+                   {
+                       -3, -7, 4, -4, -6, 4, // unit = 0
+                       3, 5, 2, 3, -3, -8,   // unit = 1
+                       -3, 7, 4, 9, 0, -5,   // unit = 2
+                   },
+                   {-1, -5, -8}, {
+                                     0, 0, 32,   // batch = 0
+                                     22, 11, 47, // batch = 1
+                                 });
+}
+
+TEST(FullyConnectedTest, InvalidBiasType_NEG)
 {
   Shape input_shape{3, 2, 2, 1};
   std::vector<float> input_data{
@@ -40,6 +118,34 @@ TEST(FullyConnectedTest, Float)
       -3, 7,  4, 9,  0,  -5, // unit = 2
   };
   Shape bias_shape{3};
+  std::vector<int32_t> bias_data{-1, -5, -8};
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor weights_tensor = makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data);
+  Tensor bias_tensor = makeInputTensor<DataType::S32>(bias_shape, bias_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FullyConnectedParams params{};
+  params.activation = Activation::RELU;
+
+  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(FullyConnectedTest, InvalidWeightShapeDim_NEG)
+{
+  Shape input_shape{3, 2, 2, 1};
+  std::vector<float> input_data{
+      -3, -5, 5,  4, 9,  -2, // batch = 0
+      -3, -2, -4, 9, -8, 1,  // batch = 1
+  };
+  Shape weights_shape{1, 3, 6};
+  std::vector<float> weights_data{
+      -3, -7, 4, -4, -6, 4,  // unit = 0
+      3,  5,  2, 3,  -3, -8, // unit = 1
+      -3, 7,  4, 9,  0,  -5, // unit = 2
+  };
+  Shape bias_shape{3};
   std::vector<float> bias_data{-1, -5, -8};
 
   Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
@@ -51,15 +157,38 @@ TEST(FullyConnectedTest, Float)
   params.activation = Activation::RELU;
 
   FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
-  kernel.configure();
-  kernel.execute();
+  EXPECT_ANY_THROW(kernel.configure());
+}
 
-  std::vector<float> ref_output_data{
-      0,  0,  32, // batch = 0
-      22, 11, 47, // batch = 1
+TEST(FullyConnectedTest, BiasElementNumWeightDimMismatch_NEG)
+{
+  Shape input_shape{3, 2, 2, 1};
+  std::vector<float> input_data{
+      -3, -5, 5,  4, 9,  -2, // batch = 0
+      -3, -2, -4, 9, -8, 1,  // batch = 1
   };
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  Shape weights_shape{6, 3};
+  std::vector<float> weights_data{
+      -3, -7, 4,  // unit = 0
+      -4, -6, 4,  // unit = 1
+      3,  5,  2,  // unit = 2
+      3,  -3, -8, // unit = 3
+      -3, 7,  4,  // unit = 4
+      9,  0,  -5, // unit = 5
+  };
+  Shape bias_shape{3};
+  std::vector<float> bias_data{-1, -5, -8};
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor weights_tensor = makeInputTensor<DataType::FLOAT32>(weights_shape, weights_data);
+  Tensor bias_tensor = makeInputTensor<DataType::FLOAT32>(bias_shape, bias_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  FullyConnectedParams params{};
+  params.activation = Activation::RELU;
+
+  FullyConnected kernel(&input_tensor, &weights_tensor, &bias_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
 }
 
 } // namespace
diff --git a/compiler/luci-interpreter/src/kernels/Greater.cpp b/compiler/luci-interpreter/src/kernels/Greater.cpp
new file mode 100644
index 000000000..f0dd2db36
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Greater.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Greater.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Greater::Greater(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void Greater::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+  if (x()->element_type() == DataType::U8)
+  {
+    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+  }
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void Greater::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Greater::evalFloat() const
+{
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowGreater(op_params, getTensorShape(x()), x_data,
+                                                  getTensorShape(y()), y_data,
+                                                  getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::Greater(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+                                   y_data, getTensorShape(output()), output_data);
+  }
+}
+
+void Greater::evalQuantized() const
+{
+  const auto x_data = getTensorData<uint8_t>(x());
+  const auto y_data = getTensorData<uint8_t>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.left_shift = 8;
+  op_params.input1_offset = -x()->zero_point(); // Note the '-'
+  op_params.input1_shift = _x_shift;
+  op_params.input1_multiplier = _x_multiplier;
+  op_params.input2_offset = -y()->zero_point(); // Note the '-'
+  op_params.input2_shift = _y_shift;
+  op_params.input2_multiplier = _y_multiplier;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowGreaterWithScaling(op_params, getTensorShape(x()), x_data,
+                                                             getTensorShape(y()), y_data,
+                                                             getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::GreaterWithScaling(op_params, getTensorShape(x()), x_data,
+                                              getTensorShape(y()), y_data, getTensorShape(output()),
+                                              output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Greater.h b/compiler/luci-interpreter/src/kernels/Greater.h
new file mode 100644
index 000000000..a65d29f5c
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Greater.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_GREATER_H
+#define LUCI_INTERPRETER_KERNELS_GREATER_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Greater : public Kernel
+{
+public:
+  Greater(const Tensor *x, const Tensor *y, Tensor *output);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _x_multiplier = 0;
+  int32_t _x_shift = 0;
+  int32_t _y_multiplier = 0;
+  int32_t _y_shift = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_GREATER_H
diff --git a/compiler/luci-interpreter/src/kernels/Greater.test.cpp b/compiler/luci-interpreter/src/kernels/Greater.test.cpp
new file mode 100644
index 000000000..3122fa840
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Greater.test.cpp
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Greater.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(GreaterTest, FloatSimple)
+{
+  std::vector<float> x_data{
+      0.5, 0.7, 0.9, // Row 1
+      1,   0,   -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+      0.9, 0.7, 0.5, // Row 1
+      -1,  0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+      false, false, true,  // Row 1
+      true,  false, false, // Row 2
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data);
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST(GreaterTest, FloatBroardcast)
+{
+  std::vector<float> x_data{
+      0.5, 0.7, 0.9, // Row 1
+      1,   0,   -1,  // Row 2
+      -1,  0,   1,   // Row 3
+  };
+
+  std::vector<float> y_data{
+      0.9, 0.7, 0.5, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+      false, false, true,  // Row 1
+      true,  false, false, // Row 2
+      false, false, true,  // Row 3
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data);
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3}));
+}
+
+// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+const float F_MIN = -128.0 / 128.0;
+const float F_MAX = 127.0 / 128.0;
+
+TEST(GreaterTest, Uint8Quantized)
+{
+  std::vector<float> x_data{
+      0.5, 0.6, 0.7,  0.9, // Row 1
+      1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+      0.9, 0.6,  0.6, 0.5, // Row 1
+      -1,  0.05, 0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+      false, false, true, true,  // Row 1
+      true,  false, true, false, // Row 2
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor =
+      makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, x_data);
+  Tensor y_tensor =
+      makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, y_data);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST(GreaterTest, Uint8QuantizedRescale)
+{
+  std::vector<float> x_data{
+      0.5, 0.6, 0.7,  0.9, // Row 1
+      1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+      0.9, 0.6,  0.6, 0.5, // Row 1
+      -1,  0.05, 0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+      false, false, true, true,  // Row 1
+      true,  false, true, false, // Row 2
+  };
+
+  std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 3);
+
+  Tensor x_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first,
+                                                  x_quant_param.second, x_data);
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first,
+                                                  y_quant_param.second, y_data);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST(GreaterTest, Uint8QuantizedBroadcast)
+{
+  std::vector<float> x_data{
+      0.4,  -0.8, 0.7,  0.3, // Row 1
+      -0.5, 0.1,  0,    0.5, // Row 2
+      1,    0,    0.05, -1,  // Row 3
+  };
+
+  std::vector<float> y_data{
+      -1, 0.05, 0, 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+      true, false, true,  false, // Row 1
+      true, true,  false, false, // Row 2
+      true, false, true,  false, // Row 3
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor =
+      makeInputTensor<DataType::U8>({1, 3, 4, 1}, quant_param.first, quant_param.second, x_data);
+  Tensor y_tensor =
+      makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST(GreaterTest, Input_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1});
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(GreaterTest, Input_Output_Type_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Greater kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/GreaterEqual.cpp b/compiler/luci-interpreter/src/kernels/GreaterEqual.cpp
new file mode 100644
index 000000000..68135e27c
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/GreaterEqual.cpp
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/GreaterEqual.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+GreaterEqual::GreaterEqual(const Tensor *x, const Tensor *y, Tensor *output)
+    : Kernel({x, y}, {output})
+{
+}
+
+void GreaterEqual::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+  if (x()->element_type() == DataType::U8)
+  {
+    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+  }
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void GreaterEqual::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void GreaterEqual::evalFloat() const
+{
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowGreaterEqual(op_params, getTensorShape(x()), x_data,
+                                                       getTensorShape(y()), y_data,
+                                                       getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::GreaterEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+                                        y_data, getTensorShape(output()), output_data);
+  }
+}
+
+void GreaterEqual::evalQuantized() const
+{
+  const auto x_data = getTensorData<uint8_t>(x());
+  const auto y_data = getTensorData<uint8_t>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.left_shift = 8;
+  op_params.input1_offset = -x()->zero_point(); // Note the '-'
+  op_params.input1_shift = _x_shift;
+  op_params.input1_multiplier = _x_multiplier;
+  op_params.input2_offset = -y()->zero_point(); // Note the '-'
+  op_params.input2_shift = _y_shift;
+  op_params.input2_multiplier = _y_multiplier;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowGreaterEqualWithScaling(
+        op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data,
+        getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::GreaterEqualWithScaling(op_params, getTensorShape(x()), x_data,
+                                                   getTensorShape(y()), y_data,
+                                                   getTensorShape(output()), output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/GreaterEqual.h b/compiler/luci-interpreter/src/kernels/GreaterEqual.h
new file mode 100644
index 000000000..e948d698f
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/GreaterEqual.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_GREATER_EQUAL_H
+#define LUCI_INTERPRETER_KERNELS_GREATER_EQUAL_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class GreaterEqual : public Kernel
+{
+public:
+  GreaterEqual(const Tensor *x, const Tensor *y, Tensor *output);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _x_multiplier = 0;
+  int32_t _x_shift = 0;
+  int32_t _y_multiplier = 0;
+  int32_t _y_shift = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_GREATER_EQUAL_H
diff --git a/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp b/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp
new file mode 100644
index 000000000..11e62644c
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/GreaterEqual.test.cpp
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/GreaterEqual.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(GreaterEqualTest, FloatSimple)
+{
+  std::vector<float> x_data{
+      0.5, 0.7, 0.9, // Row 1
+      1,   0,   -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+      0.9, 0.7, 0.5, // Row 1
+      -1,  0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+      false, true, true,  // Row 1
+      true,  true, false, // Row 2
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data);
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST(GreaterEqualTest, FloatBroardcast)
+{
+  std::vector<float> x_data{
+      0.5, 0.7, 0.9, // Row 1
+      1,   0,   -1,  // Row 2
+      -1,  0,   1,   // Row 3
+  };
+
+  std::vector<float> y_data{
+      0.9, 0.7, 0.5, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+      false, true,  true,  // Row 1
+      true,  false, false, // Row 2
+      false, false, true,  // Row 3
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data);
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3}));
+}
+
+// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+const float F_MIN = -128.0 / 128.0;
+const float F_MAX = 127.0 / 128.0;
+
+TEST(GreaterEqualTest, Uint8Quantized)
+{
+  std::vector<float> x_data{
+      0.5, 0.6, 0.7,  0.9, // Row 1
+      1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+      0.9, 0.6,  0.55, 0.5, // Row 1
+      -1,  0.05, 0,    1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+      false, true,  true, true,  // Row 1
+      true,  false, true, false, // Row 2
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor =
+      makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, x_data);
+  Tensor y_tensor =
+      makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, y_data);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST(GreaterEqualTest, Uint8QuantizedRescale)
+{
+  std::vector<float> x_data{
+      0.5, 0.5, 0.7,  0.9, // Row 1
+      1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+      0.9, 0.5,  0.6, 0.5, // Row 1
+      -1,  0.05, 0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+      false, true,  true, true,  // Row 1
+      true,  false, true, false, // Row 2
+  };
+
+  std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5);
+
+  Tensor x_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first,
+                                                  x_quant_param.second, x_data);
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first,
+                                                  y_quant_param.second, y_data);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST(GreaterEqualTest, Uint8QuantizedBroadcast)
+{
+  std::vector<float> x_data{
+      0.4,  -0.8, 0.7,  0.3, // Row 1
+      -0.5, 0.1,  0,    0.5, // Row 2
+      1,    0,    0.05, -1,  // Row 3
+  };
+
+  std::vector<float> y_data{
+      -1, 0.05, 0, 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+      true, false, true, false, // Row 1
+      true, true,  true, false, // Row 2
+      true, false, true, false, // Row 3
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor =
+      makeInputTensor<DataType::U8>({1, 3, 4, 1}, quant_param.first, quant_param.second, x_data);
+  Tensor y_tensor =
+      makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST(GreaterEqualTest, Input_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1});
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(GreaterEqualTest, Input_Output_Type_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  GreaterEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/If.cpp b/compiler/luci-interpreter/src/kernels/If.cpp
index e6bdee338..ca982d591 100644
--- a/compiler/luci-interpreter/src/kernels/If.cpp
+++ b/compiler/luci-interpreter/src/kernels/If.cpp
@@ -15,6 +15,7 @@
  */
 
 #include "kernels/If.h"
+#include "kernels/Utils.h"
 
 #include <cstring>
 
@@ -40,14 +41,14 @@ If::If(const Tensor *cond, const std::vector<const Tensor *> &inputs, std::vecto
 
 void If::configure()
 {
-  assert(cond()->element_type() == DataType::BOOL);
-  assert(cond()->shape().num_elements() == 1);
+  LUCI_INTERPRETER_CHECK(cond()->element_type() == DataType::BOOL);
+  LUCI_INTERPRETER_CHECK(cond()->shape().num_elements() == 1);
 
   for (RuntimeGraph *graph : {_then_graph, _else_graph})
   {
     (void)graph;
-    assert(graph->getInputTensors().size() == getInputTensors().size() - 1);
-    assert(graph->getOutputTensors().size() == getOutputTensors().size());
+    LUCI_INTERPRETER_CHECK(graph->getInputTensors().size() == getInputTensors().size() - 1);
+    LUCI_INTERPRETER_CHECK(graph->getOutputTensors().size() == getOutputTensors().size());
   }
 }
 
@@ -62,7 +63,7 @@ void If::execute() const
   // Copy kernel inputs to active graph inputs.
   for (size_t i = 0; i < getInputTensors().size() - 1; ++i)
   {
-    assert(graph_inputs[i]->element_type() == input(i)->element_type());
+    LUCI_INTERPRETER_CHECK(graph_inputs[i]->element_type() == input(i)->element_type());
     graph_inputs[i]->resize(input(i)->shape());
 
     const int32_t num_elements = input(i)->shape().num_elements();
@@ -75,7 +76,7 @@ void If::execute() const
   // Copy graph outputs to kernel outputs.
   for (size_t i = 0; i < getOutputTensors().size(); ++i)
   {
-    assert(graph_outputs[i]->element_type() == output(i)->element_type());
+    LUCI_INTERPRETER_CHECK(graph_outputs[i]->element_type() == output(i)->element_type());
     output(i)->resize(graph_outputs[i]->shape());
 
     const int32_t num_elements = output(i)->shape().num_elements();
diff --git a/compiler/luci-interpreter/src/kernels/If.test.cpp b/compiler/luci-interpreter/src/kernels/If.test.cpp
index 9b3857ce3..6967407fb 100644
--- a/compiler/luci-interpreter/src/kernels/If.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/If.test.cpp
@@ -85,7 +85,7 @@ TEST(IfTest, CondTrue)
   kernel.configure();
   kernel.execute();
 
-  EXPECT_THAT(extractTensorData<float>(output), ElementsAreArray(ArrayFloatNear({6, 9})));
+  EXPECT_THAT(extractTensorData<float>(output), FloatArrayNear({6, 9}));
 }
 
 TEST(IfTest, CondFalse)
@@ -103,7 +103,37 @@ TEST(IfTest, CondFalse)
   kernel.configure();
   kernel.execute();
 
-  EXPECT_THAT(extractTensorData<float>(output), ElementsAreArray(ArrayFloatNear({5, 14})));
+  EXPECT_THAT(extractTensorData<float>(output), FloatArrayNear({5, 14}));
+}
+
+TEST(IfTest, InvalidCondType_NEG)
+{
+  Tensor cond = makeInputTensor<DataType::FLOAT32>({1}, {1});
+  Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7});
+  Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2});
+  Tensor output = makeOutputTensor(DataType::FLOAT32);
+
+  RuntimeModule module(nullptr);
+  RuntimeGraph *then_graph = buildAddSubgraph(&module);
+  RuntimeGraph *else_graph = buildMulSubgraph(&module);
+
+  If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(IfTest, InvalidCondElementNum_NEG)
+{
+  Tensor cond = makeInputTensor<DataType::BOOL>({2}, {false, true});
+  Tensor input1 = makeInputTensor<DataType::FLOAT32>({2}, {5, 7});
+  Tensor input2 = makeInputTensor<DataType::FLOAT32>({1, 2}, {1, 2});
+  Tensor output = makeOutputTensor(DataType::FLOAT32);
+
+  RuntimeModule module(nullptr);
+  RuntimeGraph *then_graph = buildAddSubgraph(&module);
+  RuntimeGraph *else_graph = buildMulSubgraph(&module);
+
+  If kernel(&cond, {&input1, &input2}, {&output}, then_graph, else_graph);
+  EXPECT_ANY_THROW(kernel.configure());
 }
 
 } // namespace
diff --git a/compiler/luci-interpreter/src/kernels/L2Normalize.cpp b/compiler/luci-interpreter/src/kernels/L2Normalize.cpp
index cfa535075..0bf133d9c 100644
--- a/compiler/luci-interpreter/src/kernels/L2Normalize.cpp
+++ b/compiler/luci-interpreter/src/kernels/L2Normalize.cpp
@@ -34,15 +34,16 @@ L2Normalize::L2Normalize(const Tensor *input, Tensor *output, const L2NormParams
 
 void L2Normalize::configure()
 {
-  assert(input()->shape().num_dims() <= 4);
-  assert(output()->element_type() == DataType::FLOAT32 || output()->element_type() == DataType::U8);
-  assert(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() <= 4);
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32 ||
+                         output()->element_type() == DataType::U8);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
   if (output()->element_type() == DataType::U8)
   {
-    assert(output()->scale() == (1. / 128.));
-    assert(output()->zero_point() == 128);
+    LUCI_INTERPRETER_CHECK(output()->scale() == (1. / 128.));
+    LUCI_INTERPRETER_CHECK(output()->zero_point() == 128);
   }
-  assert(params().activation == Activation::NONE);
+  LUCI_INTERPRETER_CHECK(params().activation == Activation::NONE);
   output()->resize(input()->shape());
 }
 
diff --git a/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp b/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp
index f53eaca94..8f9431182 100644
--- a/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/L2Normalize.test.cpp
@@ -26,11 +26,11 @@ namespace
 
 using namespace testing;
 
-TEST(L2NormalizeTest, Float)
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
 {
-  std::vector<float> input_data = {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1};
-
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, input_data);
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   L2NormParams params{};
@@ -40,14 +40,76 @@ TEST(L2NormalizeTest, Float)
   kernel.configure();
   kernel.execute();
 
-  std::vector<float> ref_output_data{-0.55, 0.3, 0.35, 0.6, -0.35, 0.05};
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <>
+void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
+                    std::initializer_list<int32_t> output_shape,
+                    std::initializer_list<float> input_data,
+                    std::initializer_list<float> output_data)
+{
+  std::pair<float, int32_t> quant_param =
+      quantizationParams<uint8_t>(std::min(input_data) < 0 ? std::min(input_data) : 0.f,
+                                  std::max(input_data) > 0 ? std::max(input_data) : 0.f);
+
+  Tensor input_tensor =
+      makeInputTensor<DataType::U8>(input_shape, quant_param.first, quant_param.second, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 128., 128);
+
+  L2NormParams params{};
+  params.activation = Activation::NONE;
+
+  L2Normalize kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, output_tensor.scale()));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <typename T> class L2NormalizeTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_CASE(L2NormalizeTest, DataTypes);
+
+TYPED_TEST(L2NormalizeTest, Simple)
+{
+  Check<TypeParam>({1, 1, 1, 6}, {1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1},
+                   {-0.55, 0.3, 0.35, 0.6, -0.35, 0.05});
 }
 
-// TODO Uint8Quantized
-// Implement GetDequantizedOutput Function.
-// Create Test for Uint8 Case
+TEST(L2NormalizeTest, ActivationType_NEG)
+{
+  std::vector<float> input_data = {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1};
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  L2NormParams params{};
+  params.activation = Activation::RELU6;
+
+  L2Normalize kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(L2NormalizeTest, InvalidOutputQuantParam_NEG)
+{
+  std::vector<float> input_data = {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1};
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>({1, 1, 1, 6}, 1. / 64., 127, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 64., 127);
+
+  L2NormParams params{};
+  params.activation = Activation::NONE;
+
+  L2Normalize kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
 
 } // namespace
 } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/L2Pool2D.cpp b/compiler/luci-interpreter/src/kernels/L2Pool2D.cpp
index 37a6ddedc..979364a7f 100644
--- a/compiler/luci-interpreter/src/kernels/L2Pool2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/L2Pool2D.cpp
@@ -36,8 +36,8 @@ L2Pool2D::L2Pool2D(const Tensor *input, Tensor *output, const Pool2DParams &para
 
 void L2Pool2D::configure()
 {
-  assert(input()->shape().num_dims() == 4);
-  assert(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
 
   int batches = input()->shape().dim(0);
   int height = input()->shape().dim(1);
@@ -55,7 +55,7 @@ void L2Pool2D::configure()
   _padding_height =
       computePadding(params().stride_height, 1, height, params().filter_height, out_height);
 
-  assert(input()->element_type() == DataType::FLOAT32);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == DataType::FLOAT32);
   output()->resize({batches, out_height, out_width, channels_out});
 }
 
diff --git a/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp b/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp
index 06bb9388f..5f834e3c1 100644
--- a/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/L2Pool2D.test.cpp
@@ -50,8 +50,7 @@ TEST(L2Pool2DTest, FloatNone)
   kernel.execute();
 
   std::vector<float> ref_output_data{3.5, 6.5};
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
   // TODO make a Shape checking of output_tensor.
 }
 
@@ -78,8 +77,7 @@ TEST(L2Pool2DTest, FloatRelu)
   kernel.execute();
 
   std::vector<float> ref_output_data{3.53553, 6.5};
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
   // TODO make a Shape checking of output_tensor.
 }
 
@@ -106,8 +104,7 @@ TEST(L2Pool2DTest, FloatRelu1)
   kernel.execute();
 
   std::vector<float> ref_output_data{0.353553, 1.0};
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
   // TODO make a Shape checking of output_tensor.
 }
 
@@ -134,8 +131,7 @@ TEST(L2Pool2DTest, FloatRelu6)
   kernel.execute();
 
   std::vector<float> ref_output_data{0.353553, 6.0};
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
   // TODO make a Shape checking of output_tensor.
 }
 
@@ -162,12 +158,11 @@ TEST(L2Pool2DTest, FloatPaddingSame)
   kernel.execute();
 
   std::vector<float> ref_output_data{3.5, 6.5};
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
   // TODO make a Shape checking of output_tensor.
 }
 
-TEST(L2Pool2DTest, FloatPaddingSameSlide1)
+TEST(L2Pool2DTest, FloatPaddingSameStride)
 {
   Shape input_shape{1, 2, 4, 1};
   std::vector<float> input_data{
@@ -190,12 +185,11 @@ TEST(L2Pool2DTest, FloatPaddingSameSlide1)
   kernel.execute();
 
   std::vector<float> ref_output_data{3.5, 6.0, 6.5, 5.70088, 2.54951, 7.2111, 8.63134, 7.0};
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
   // TODO make a Shape checking of output_tensor.
 }
 
-TEST(L2Pool2DTest, FloatPaddingValidSlide1)
+TEST(L2Pool2DTest, FloatPaddingValidStride)
 {
   Shape input_shape{1, 2, 4, 1};
   std::vector<float> input_data{
@@ -218,11 +212,54 @@ TEST(L2Pool2DTest, FloatPaddingValidSlide1)
   kernel.execute();
 
   std::vector<float> ref_output_data{3.5, 6.0, 6.5};
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
   // TODO make a Shape checking of output_tensor.
 }
 
+TEST(L2Pool2DTest, InvalidInputShape_NEG)
+{
+  Shape input_shape{1, 2, 4};
+  std::vector<float> input_data{
+      0, 6, 2,  4, //
+      3, 2, 10, 7, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.activation = Activation::NONE;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 1;
+  params.stride_width = 1;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(L2Pool2DTest, InvalidInputOutputType_NEG)
+{
+  Shape input_shape{1, 2, 4};
+  std::vector<float> input_data{
+      0, 6, 2,  4, //
+      3, 2, 10, 7, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.activation = Activation::NONE;
+  params.filter_height = 2;
+  params.filter_width = 2;
+  params.stride_height = 1;
+  params.stride_width = 1;
+
+  L2Pool2D kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
 } // namespace
 } // namespace kernels
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp b/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp
index 1a26debe0..919b12792 100644
--- a/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp
+++ b/compiler/luci-interpreter/src/kernels/LeakyRelu.cpp
@@ -36,7 +36,7 @@ LeakyRelu::LeakyRelu(const Tensor *input, Tensor *output, const LeakyReluParams
 
 void LeakyRelu::configure()
 {
-  assert(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
   if (input()->element_type() == DataType::U8)
   {
     double alpha_multiplier = input()->scale() * params().alpha / output()->scale();
diff --git a/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp b/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp
index c79d3d6bc..2778549ed 100644
--- a/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/LeakyRelu.test.cpp
@@ -28,12 +28,11 @@ using namespace testing;
 
 template <typename T>
 void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
-           std::initializer_list<T> input_data, std::initializer_list<T> output_data, float alpha,
-           DataType element_type)
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data,
+           float alpha)
 {
-  Tensor input_tensor{element_type, input_shape, {}, ""};
-  input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(T));
-
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
   Tensor output_tensor = makeOutputTensor(element_type);
 
   LeakyReluParams params{};
@@ -44,30 +43,75 @@ void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int
   kernel.configure();
   kernel.execute();
 
-  (void)output_shape;
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
   EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
 }
 
-TEST(LeakReluTest, FloatSimple)
+template <>
+void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
+                    std::initializer_list<int32_t> output_shape,
+                    std::initializer_list<float> input_data,
+                    std::initializer_list<float> output_data, float alpha)
 {
-  Check<float>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 3}, /*input_data=*/
-               {
-                   0.0f, 1.0f, 3.0f,   // Row 1
-                   1.0f, -1.0f, -2.0f, // Row 2
-               },
-               /*output_data=*/
-               {
-                   0.0f, 1.0f, 3.0f,   // Row 1
-                   1.0f, -0.5f, -1.0f, // Row 2
-               },
-               /*alpha=*/0.5f, getElementType<float>());
+  const float quantized_tolerance = getTolerance(-8, 127.f / 16.f, 255);
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-8, 127.f / 16.f);
+  Tensor input_tensor =
+      makeInputTensor<DataType::U8>(input_shape, quant_param.first, quant_param.second, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  LeakyReluParams params{};
+  params.alpha = alpha;
+
+  LeakyRelu kernel(&input_tensor, &output_tensor, params);
+
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, quantized_tolerance));
+}
+
+template <typename T> class LeakReluTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_CASE(LeakReluTest, DataTypes);
+
+TYPED_TEST(LeakReluTest, Simple)
+{
+  Check<TypeParam>(/*input_shape=*/{2, 3}, /*output_shape=*/{2, 3},
+                   /*input_data=*/
+                   {
+                       0.0f, 1.0f, 3.0f,   // Row 1
+                       1.0f, -1.0f, -2.0f, // Row 2
+                   },
+                   /*output_data=*/
+                   {
+                       0.0f, 1.0f, 3.0f,   // Row 1
+                       1.0f, -0.5f, -1.0f, // Row 2
+                   },
+                   /*alpha=*/0.5f);
 
   SUCCEED();
 }
 
-// TODO Uint8Simple
-// Implement GetDequantizedOutput Function.
-// Create Test for Uint8 Case
+TEST(LeakReluTest, IvalidInputOutputType_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, {
+                                                                       0.0f, 1.0f, 3.0f,   // Row 1
+                                                                       1.0f, -1.0f, -2.0f, // Row 2
+                                                                   });
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  LeakyReluParams params{};
+  params.alpha = 0.5f;
+
+  LeakyRelu kernel(&input_tensor, &output_tensor, params);
+
+  EXPECT_ANY_THROW(kernel.configure());
+}
 
 } // namespace
 } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/Less.cpp b/compiler/luci-interpreter/src/kernels/Less.cpp
new file mode 100644
index 000000000..041444926
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Less.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Less.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Less::Less(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void Less::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+  if (x()->element_type() == DataType::U8)
+  {
+    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+  }
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void Less::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Less::evalFloat() const
+{
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowLess(op_params, getTensorShape(x()), x_data,
+                                               getTensorShape(y()), y_data,
+                                               getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::Less(op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data,
+                                getTensorShape(output()), output_data);
+  }
+}
+
+void Less::evalQuantized() const
+{
+  const auto x_data = getTensorData<uint8_t>(x());
+  const auto y_data = getTensorData<uint8_t>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.left_shift = 8;
+  op_params.input1_offset = -x()->zero_point(); // Note the '-'
+  op_params.input1_shift = _x_shift;
+  op_params.input1_multiplier = _x_multiplier;
+  op_params.input2_offset = -y()->zero_point(); // Note the '-'
+  op_params.input2_shift = _y_shift;
+  op_params.input2_multiplier = _y_multiplier;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowLessWithScaling(op_params, getTensorShape(x()), x_data,
+                                                          getTensorShape(y()), y_data,
+                                                          getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::LessWithScaling(op_params, getTensorShape(x()), x_data,
+                                           getTensorShape(y()), y_data, getTensorShape(output()),
+                                           output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Less.h b/compiler/luci-interpreter/src/kernels/Less.h
new file mode 100644
index 000000000..fe03e10b1
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Less.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LESS_H
+#define LUCI_INTERPRETER_KERNELS_LESS_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Less : public Kernel
+{
+public:
+  Less(const Tensor *x, const Tensor *y, Tensor *output);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _x_multiplier = 0;
+  int32_t _x_shift = 0;
+  int32_t _y_multiplier = 0;
+  int32_t _y_shift = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LESS_H
diff --git a/compiler/luci-interpreter/src/kernels/Less.test.cpp b/compiler/luci-interpreter/src/kernels/Less.test.cpp
new file mode 100644
index 000000000..73aa30b36
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Less.test.cpp
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Less.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(LessTest, FloatSimple)
+{
+  std::vector<float> x_data{
+      0.5, 0.7, 0.9, // Row 1
+      1,   0,   -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+      0.9, 0.7, 0.5, // Row 1
+      -1,  0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+      true,  false, false, // Row 1
+      false, false, true,  // Row 2
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data);
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST(LessTest, FloatBroardcast)
+{
+  std::vector<float> x_data{
+      0.5, 0.7, 0.9, // Row 1
+      1,   0,   -1,  // Row 2
+      -1,  0,   1,   // Row 3
+  };
+
+  std::vector<float> y_data{
+      0.9, 0.7, 0.5, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+      true,  false, false, // Row 1
+      false, true,  true,  // Row 2
+      true,  true,  false, // Row 3
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data);
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3}));
+}
+
+// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+const float F_MIN = -128.0 / 128.0;
+const float F_MAX = 127.0 / 128.0;
+
+TEST(LessTest, Uint8Quantized)
+{
+  std::vector<float> x_data{
+      0.5, 0.6, 0.7,  0.9, // Row 1
+      1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+      0.9, 0.6,  0.55, 0.5, // Row 1
+      -1,  0.05, 0,    1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+      true,  false, false, false, // Row 1
+      false, true,  false, true,  // Row 2
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor =
+      makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, x_data);
+  Tensor y_tensor =
+      makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, y_data);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST(LessTest, Uint8QuantizedRescale)
+{
+  std::vector<float> x_data{
+      0.5, 0.6, 0.7,  0.9, // Row 1
+      1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+      0.9, 0.6,  0.6, 0.5, // Row 1
+      -1,  0.05, 0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+      true,  false, false, false, // Row 1
+      false, true,  false, true,  // Row 2
+  };
+
+  std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5);
+
+  Tensor x_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first,
+                                                  x_quant_param.second, x_data);
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first,
+                                                  y_quant_param.second, y_data);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST(LessTest, Uint8QuantizedBroadcast)
+{
+  std::vector<float> x_data{
+      0.4,  -0.8, 0.7,  0.3, // Row 1
+      -0.5, 0.1,  0,    0.5, // Row 2
+      1,    0,    0.05, -1,  // Row 3
+  };
+
+  std::vector<float> y_data{
+      -1, 0.05, 0, 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+      false, true,  false, true, // Row 1
+      false, false, false, true, // Row 2
+      false, true,  false, true, // Row 3
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor =
+      makeInputTensor<DataType::U8>({1, 3, 4, 1}, quant_param.first, quant_param.second, x_data);
+  Tensor y_tensor =
+      makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST(LessTest, Input_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1});
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(LessTest, Input_Output_Type_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Less kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/LessEqual.cpp b/compiler/luci-interpreter/src/kernels/LessEqual.cpp
new file mode 100644
index 000000000..b8aaba178
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/LessEqual.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LessEqual.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+LessEqual::LessEqual(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void LessEqual::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+  if (x()->element_type() == DataType::U8)
+  {
+    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+  }
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void LessEqual::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void LessEqual::evalFloat() const
+{
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowLessEqual(op_params, getTensorShape(x()), x_data,
+                                                    getTensorShape(y()), y_data,
+                                                    getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::LessEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+                                     y_data, getTensorShape(output()), output_data);
+  }
+}
+
+void LessEqual::evalQuantized() const
+{
+  const auto x_data = getTensorData<uint8_t>(x());
+  const auto y_data = getTensorData<uint8_t>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.left_shift = 8;
+  op_params.input1_offset = -x()->zero_point(); // Note the '-'
+  op_params.input1_shift = _x_shift;
+  op_params.input1_multiplier = _x_multiplier;
+  op_params.input2_offset = -y()->zero_point(); // Note the '-'
+  op_params.input2_shift = _y_shift;
+  op_params.input2_multiplier = _y_multiplier;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowLessEqualWithScaling(
+        op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data,
+        getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::LessEqualWithScaling(op_params, getTensorShape(x()), x_data,
+                                                getTensorShape(y()), y_data,
+                                                getTensorShape(output()), output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/LessEqual.h b/compiler/luci-interpreter/src/kernels/LessEqual.h
new file mode 100644
index 000000000..ed4b0f1ea
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/LessEqual.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LESS_EQUAL_H
+#define LUCI_INTERPRETER_KERNELS_LESS_EQUAL_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class LessEqual : public Kernel
+{
+public:
+  LessEqual(const Tensor *x, const Tensor *y, Tensor *output);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _x_multiplier = 0;
+  int32_t _x_shift = 0;
+  int32_t _y_multiplier = 0;
+  int32_t _y_shift = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LESS_EQUAL_H
diff --git a/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp b/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp
new file mode 100644
index 000000000..9184c061f
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/LessEqual.test.cpp
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LessEqual.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(LessEqualTest, FloatSimple)
+{
+  std::vector<float> x_data{
+      0.5, 0.7, 0.9, // Row 1
+      1,   0,   -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+      0.9, 0.7, 0.5, // Row 1
+      -1,  0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+      true,  true, false, // Row 1
+      false, true, true,  // Row 2
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data);
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST(LessEqualTest, FloatBroardcast)
+{
+  std::vector<float> x_data{
+      0.5, 0.7, 0.9, // Row 1
+      1,   0,   -1,  // Row 2
+      -1,  0,   1,   // Row 3
+  };
+
+  std::vector<float> y_data{
+      0.9, 0.7, 0.5, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+      true,  true, false, // Row 1
+      false, true, true,  // Row 2
+      true,  true, false, // Row 3
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({3, 3}, x_data);
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({3, 3}));
+}
+
+// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+const float F_MIN = -128.0 / 128.0;
+const float F_MAX = 127.0 / 128.0;
+
+TEST(LessEqualTest, Uint8Quantized)
+{
+  std::vector<float> x_data{
+      0.5, 0.6, 0.7,  0.9, // Row 1
+      1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+      0.9, 0.6,  0.55, 0.5, // Row 1
+      -1,  0.05, 0,    1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+      true,  true, false, false, // Row 1
+      false, true, false, true,  // Row 2
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor =
+      makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, x_data);
+  Tensor y_tensor =
+      makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first, quant_param.second, y_data);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST(LessEqualTest, Uint8QuantizedRescale)
+{
+  std::vector<float> x_data{
+      0.5, 0.6, 0.7,  0.9, // Row 1
+      1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+      0.9, 0.6,  0.6, 0.5, // Row 1
+      -1,  0.05, 0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+      true,  true, false, false, // Row 1
+      false, true, false, true,  // Row 2
+  };
+
+  std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 1.2, F_MAX * 1.5);
+
+  Tensor x_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first,
+                                                  x_quant_param.second, x_data);
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first,
+                                                  y_quant_param.second, y_data);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST(LessEqualTest, Uint8QuantizedBroadcast)
+{
+  std::vector<float> x_data{
+      0.4,  -0.8, 0.7,  0.3, // Row 1
+      -0.5, 0.1,  0,    0.5, // Row 2
+      1,    0,    0.05, -1,  // Row 3
+  };
+
+  std::vector<float> y_data{
+      -1, 0.05, 0, 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+      false, true,  false, true, // Row 1
+      false, false, true,  true, // Row 2
+      false, true,  false, true, // Row 3
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor =
+      makeInputTensor<DataType::U8>({1, 3, 4, 1}, quant_param.first, quant_param.second, x_data);
+  Tensor y_tensor =
+      makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 3, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST(LessEqualTest, Input_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1});
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(LessEqualTest, Input_Output_Type_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  LessEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.cpp b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.cpp
index 08efa1d6a..b78e27128 100644
--- a/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.cpp
+++ b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.cpp
@@ -36,9 +36,9 @@ LocalResponseNormalization::LocalResponseNormalization(
 
 void LocalResponseNormalization::configure()
 {
-  assert(input()->shape().num_dims() == 4);
-  assert(output()->element_type() == DataType::FLOAT32);
-  assert(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::FLOAT32);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
   output()->resize(input()->shape());
 }
 
diff --git a/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp
index 4191bdb29..d98305c1a 100644
--- a/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/LocalResponseNormalization.test.cpp
@@ -44,7 +44,7 @@ TEST(LocalResponseNormalizationTest, SameAsL2Norm)
   kernel.execute();
 
   EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear({-0.55, 0.3, 0.35, 0.6, -0.35, 0.05})));
+              FloatArrayNear({-0.55, 0.3, 0.35, 0.6, -0.35, 0.05}));
 }
 
 TEST(LocalResponseNormalizationTest, WithAlpha)
@@ -64,7 +64,7 @@ TEST(LocalResponseNormalizationTest, WithAlpha)
   kernel.execute();
 
   EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear({-0.275, 0.15, 0.175, 0.3, -0.175, 0.025})));
+              FloatArrayNear({-0.275, 0.15, 0.175, 0.3, -0.175, 0.025}));
 }
 
 TEST(LocalResponseNormalizationTest, WithBias)
@@ -84,7 +84,7 @@ TEST(LocalResponseNormalizationTest, WithBias)
   kernel.execute();
 
   EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear({-0.22, 0.12, 0.14, 0.24, -0.14, 0.02})));
+              FloatArrayNear({-0.22, 0.12, 0.14, 0.24, -0.14, 0.02}));
 }
 
 TEST(LocalResponseNormalizationTest, SmallRadius)
@@ -104,8 +104,39 @@ TEST(LocalResponseNormalizationTest, SmallRadius)
   kernel.execute();
 
   EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(
-                  ArrayFloatNear({-0.264926, 0.125109, 0.140112, 0.267261, -0.161788, 0.0244266})));
+              FloatArrayNear({-0.264926, 0.125109, 0.140112, 0.267261, -0.161788, 0.0244266}));
+}
+
+TEST(LocalResponseNormalizationTest, InvalidInputDimension_NEG)
+{
+  Tensor input_tensor =
+      makeInputTensor<DataType::FLOAT32>({1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  LocalResponseNormalizationParams params{};
+  params.radius = 20;
+  params.bias = 0.0;
+  params.alpha = 1.0;
+  params.beta = 0.5;
+
+  LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(LocalResponseNormalizationTest, InvalidInputOutputType_NEG)
+{
+  Tensor input_tensor =
+      makeInputTensor<DataType::FLOAT32>({1, 1, 1, 6}, {-1.1, 0.6, 0.7, 1.2, -0.7, 0.1});
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  LocalResponseNormalizationParams params{};
+  params.radius = 20;
+  params.bias = 0.0;
+  params.alpha = 1.0;
+  params.beta = 0.5;
+
+  LocalResponseNormalization kernel(&input_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
 }
 
 } // namespace
diff --git a/compiler/luci-interpreter/src/kernels/LogSoftmax.cpp b/compiler/luci-interpreter/src/kernels/LogSoftmax.cpp
new file mode 100644
index 000000000..03d13e4ce
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/LogSoftmax.cpp
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogSoftmax.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+LogSoftmax::LogSoftmax(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void LogSoftmax::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  if (input()->element_type() == DataType::U8)
+  {
+    LUCI_INTERPRETER_CHECK(output()->scale() == 16. / 256);
+    LUCI_INTERPRETER_CHECK(output()->zero_point() == 255);
+
+    tflite::SoftmaxParams params{};
+
+    params.table = _table;
+    params.beta = 1.0;
+
+    tflite::optimized_ops::PopulateSoftmaxLookupTable(&params, input()->scale(), params.beta);
+  }
+  output()->resize(input()->shape());
+}
+
+void LogSoftmax::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void LogSoftmax::evalFloat() const
+{
+  tflite::SoftmaxParams params{};
+  tflite::reference_ops::LogSoftmax(params, getTensorShape(input()), getTensorData<float>(input()),
+                                    getTensorShape(output()), getTensorData<float>(output()));
+}
+
+void LogSoftmax::evalQuantized() const
+{
+  const auto input_shape = getTensorShape(input());
+  const auto output_shape = getTensorShape(output());
+  const auto input_scale = input()->scale();
+  uint8_t *output_data = getTensorData<uint8_t>(output());
+  const uint8_t *input_data = getTensorData<uint8_t>(input());
+
+  tflite::SoftmaxParams params{};
+
+  params.table = const_cast<float *>(_table);
+  params.zero_point = output()->zero_point();
+  params.scale = output()->scale();
+
+  tflite::optimized_ops::LogSoftmax(params, input_scale, input_shape, input_data, output_shape,
+                                    output_data);
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/LogSoftmax.h b/compiler/luci-interpreter/src/kernels/LogSoftmax.h
new file mode 100644
index 000000000..18477fbe3
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/LogSoftmax.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_LOGSOFTMAX_H
+#define LUCI_INTERPRETER_KERNELS_LOGSOFTMAX_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class LogSoftmax : public Kernel
+{
+public:
+  LogSoftmax(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+
+  float _table[256];
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_LOGSOFTMAX_H
diff --git a/compiler/luci-interpreter/src/kernels/LogSoftmax.test.cpp b/compiler/luci-interpreter/src/kernels/LogSoftmax.test.cpp
new file mode 100644
index 000000000..d3b331dfe
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/LogSoftmax.test.cpp
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/LogSoftmax.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(LogSoftmaxTest, Float)
+{
+  Shape input_shape{2, 4};
+  std::vector<float> input_data{
+      0, -6, 2,  4, //
+      3, -2, 10, 1, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  LogSoftmax kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+      -4.14297, -10.14297, -2.14297,   -.142971, //
+      -7.00104, -12.00104, -.00104087, -9.00104, //
+  };
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(LogSoftmaxTest, Uint8)
+{
+  float kMin = -10;
+  float kMax = 10;
+  float kLogSoftmaxQuantizedTolerance = 16. / 256;
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(kMin, kMax);
+  std::vector<float> input_data{
+      0, -6, 2,  4, //
+      3, -2, 10, 1, //
+  };
+  Tensor input_tensor =
+      makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 16. / 256, 255);
+
+  LogSoftmax kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{
+      -4.14297, -10.14297, -2.14297,   -.142971, //
+      -7.00104, -12.00104, -.00104087, -9.00104, //
+  };
+  std::vector<int32_t> ref_output_shape{2, 4};
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kLogSoftmaxQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray({189, 93, 221, 253, 142, 63, 255, 111}));
+}
+
+TEST(LogSoftmaxTest, InvalidInputOutputType_NEG)
+{
+  std::vector<float> input_data{
+      0, -6, 2,  4, //
+      3, -2, 10, 1, //
+  };
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 4}, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 16. / 256, 255);
+
+  LogSoftmax kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(LogSoftmaxTest, InvalidOutputQuantParam_NEG)
+{
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-10, 10);
+  std::vector<float> input_data{
+      0, -6, 2,  4, //
+      3, -2, 10, 1, //
+  };
+  Tensor input_tensor =
+      makeInputTensor<DataType::U8>({2, 4}, quant_param.first, quant_param.second, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 20. / 256, 255);
+
+  LogSoftmax kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Logistic.cpp b/compiler/luci-interpreter/src/kernels/Logistic.cpp
index c7d45615c..97d7bf13d 100644
--- a/compiler/luci-interpreter/src/kernels/Logistic.cpp
+++ b/compiler/luci-interpreter/src/kernels/Logistic.cpp
@@ -29,10 +29,10 @@ Logistic::Logistic(const Tensor *input, Tensor *output) : Kernel({input}, {outpu
 
 void Logistic::configure()
 {
-  assert(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
   if (input()->element_type() == DataType::U8)
   {
-    assert(output()->scale() == 1. / 256);
+    LUCI_INTERPRETER_CHECK(output()->scale() == 1. / 256);
     populateLookupTable();
   }
   output()->resize(input()->shape());
diff --git a/compiler/luci-interpreter/src/kernels/Logistic.test.cpp b/compiler/luci-interpreter/src/kernels/Logistic.test.cpp
index 00feddf3d..d3bbb330d 100644
--- a/compiler/luci-interpreter/src/kernels/Logistic.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Logistic.test.cpp
@@ -26,31 +26,108 @@ namespace
 
 using namespace testing;
 
-TEST(LogisticTest, Float)
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
 {
-  Shape input_shape{1, 2, 4, 1};
-  std::vector<float> input_data{
-      0, -6, 2,  4, //
-      3, -2, 10, 1, //
-  };
-  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
-  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+  Tensor input_tensor = makeInputTensor<getElementType<T>()>(input_shape, input_data);
+  Tensor output_tensor = makeOutputTensor(getElementType<T>());
 
   Logistic kernel(&input_tensor, &output_tensor);
   kernel.configure();
   kernel.execute();
 
-  std::vector<float> ref_output_data{
-      0.5,      0.002473, 0.880797, 0.982014, //
-      0.952574, 0.119203, 0.999955, 0.731059, //
-  };
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
-  // TODO make a Shape checking of output_tensor.
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
 }
 
-// TODO Uint8
-// Need to Implement GetDequantizedOutput Function.
+template <>
+void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
+                    std::initializer_list<int32_t> output_shape,
+                    std::initializer_list<float> input_data,
+                    std::initializer_list<float> output_data)
+{
+  std::pair<float, int32_t> input_quant_param =
+      quantizationParams<uint8_t>(std::min(input_data), std::max(input_data));
+  Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, input_quant_param.first,
+                                                      input_quant_param.second, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 256, 0);
+
+  Logistic kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, output_tensor.scale() * 2));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+template <typename T> class LogisticTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_CASE(LogisticTest, DataTypes);
+
+TYPED_TEST(LogisticTest, Simple)
+{
+  Check<TypeParam>(
+      {89}, {89},
+      {-10.0000000000, -9.7727272727, -9.5454545455, -9.3181818182, -9.0909090909, -8.8636363636,
+       -8.6363636364,  -8.4090909091, -8.1818181818, -7.9545454545, -7.7272727273, -7.5000000000,
+       -7.2727272727,  -7.0454545455, -6.8181818182, -6.5909090909, -6.3636363636, -6.1363636364,
+       -5.9090909091,  -5.6818181818, -5.4545454545, -5.2272727273, -5.0000000000, -4.7727272727,
+       -4.5454545455,  -4.3181818182, -4.0909090909, -3.8636363636, -3.6363636364, -3.4090909091,
+       -3.1818181818,  -2.9545454545, -2.7272727273, -2.5000000000, -2.2727272727, -2.0454545455,
+       -1.8181818182,  -1.5909090909, -1.3636363636, -1.1363636364, -0.9090909091, -0.6818181818,
+       -0.4545454545,  -0.2272727273, 0.0000000000,  0.2272727273,  0.4545454545,  0.6818181818,
+       0.9090909091,   1.1363636364,  1.3636363636,  1.5909090909,  1.8181818182,  2.0454545455,
+       2.2727272727,   2.5000000000,  2.7272727273,  2.9545454545,  3.1818181818,  3.4090909091,
+       3.6363636364,   3.8636363636,  4.0909090909,  4.3181818182,  4.5454545455,  4.7727272727,
+       5.0000000000,   5.2272727273,  5.4545454545,  5.6818181818,  5.9090909091,  6.1363636364,
+       6.3636363636,   6.5909090909,  6.8181818182,  7.0454545455,  7.2727272727,  7.5000000000,
+       7.7272727273,   7.9545454545,  8.1818181818,  8.4090909091,  8.6363636364,  8.8636363636,
+       9.0909090909,   9.3181818182,  9.5454545455,  9.7727272727,  10.0000000000},
+      {0.0000453979, 0.0000569815, 0.0000715205, 0.0000897689, 0.0001126729, 0.0001414198,
+       0.0001774998, 0.0002227827, 0.0002796147, 0.0003509396, 0.0004404502, 0.0005527786,
+       0.0006937345, 0.0008706021, 0.0010925128, 0.0013709094, 0.0017201256, 0.0021581065,
+       0.0027073042, 0.0033957870, 0.0042586071, 0.0053394826, 0.0066928509, 0.0083863576,
+       0.0105038445, 0.0131488902, 0.0164489307, 0.0205599431, 0.0256715863, 0.0320125562,
+       0.0398556989, 0.0495221198, 0.0613831074, 0.0758581800, 0.0934070047, 0.1145124805,
+       0.1396521834, 0.1692560327, 0.2036499335, 0.2429886272, 0.2871859014, 0.3358556241,
+       0.3882805886, 0.4434251301, 0.5000000000, 0.5565748699, 0.6117194114, 0.6641443759,
+       0.7128140986, 0.7570113728, 0.7963500665, 0.8307439673, 0.8603478166, 0.8854875195,
+       0.9065929953, 0.9241418200, 0.9386168926, 0.9504778802, 0.9601443011, 0.9679874438,
+       0.9743284137, 0.9794400569, 0.9835510693, 0.9868511098, 0.9894961555, 0.9916136424,
+       0.9933071491, 0.9946605174, 0.9957413929, 0.9966042130, 0.9972926958, 0.9978418935,
+       0.9982798744, 0.9986290906, 0.9989074872, 0.9991293979, 0.9993062655, 0.9994472214,
+       0.9995595498, 0.9996490604, 0.9997203853, 0.9997772173, 0.9998225002, 0.9998585802,
+       0.9998873271, 0.9999102311, 0.9999284795, 0.9999430185, 0.9999546021});
+}
+
+TEST(LogisticTest, IvalidInputOutputType_NEG)
+{
+  Shape input_shape = {1};
+  std::vector<float> input_data{10};
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 256, 0);
+
+  Logistic kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(LogisticTest, IvalidQuantParam_NEG)
+{
+  Shape input_shape = {2};
+  std::vector<float> input_data{-10, 10};
+  std::pair<float, int32_t> input_quant_param = quantizationParams<uint8_t>(-10, 10);
+  Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, input_quant_param.first,
+                                                      input_quant_param.second, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 1. / 255, 0);
+
+  Logistic kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
 
 } // namespace
 } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/MaxPool2D.cpp b/compiler/luci-interpreter/src/kernels/MaxPool2D.cpp
index afecf9058..123e6e1a2 100644
--- a/compiler/luci-interpreter/src/kernels/MaxPool2D.cpp
+++ b/compiler/luci-interpreter/src/kernels/MaxPool2D.cpp
@@ -18,6 +18,7 @@
 
 #include "kernels/Utils.h"
 
+#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
 #include <tensorflow/lite/kernels/internal/reference/pooling.h>
 
 #include <stdexcept>
@@ -35,7 +36,7 @@ MaxPool2D::MaxPool2D(const Tensor *input, Tensor *output, const Pool2DParams &pa
 
 void MaxPool2D::configure()
 {
-  assert(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
   assert(input()->shape().num_dims() == 4);
   const Shape &input_shape = input()->shape();
   const int32_t batches = input_shape.dim(0);
@@ -54,10 +55,15 @@ void MaxPool2D::configure()
       computePadding(_params.stride_width, 1, input_width, _params.filter_width, output_width);
 
   output()->resize({batches, output_height, output_width, depth});
-  if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S8)
+  if (input()->element_type() == DataType::U8)
   {
-    assert(input()->scale() == output()->scale());
-    assert(input()->zero_point() == output()->zero_point());
+    LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
+    LUCI_INTERPRETER_CHECK(output()->zero_point() == input()->zero_point());
+  }
+  else if (input()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(std::abs(output()->scale() - input()->scale()) <= 1.0e-6);
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
   }
 }
 
@@ -71,6 +77,9 @@ void MaxPool2D::execute() const
     case DataType::U8:
       evalQuantized();
       break;
+    case DataType::S16:
+      evalSInt16();
+      break;
     default:
       throw std::runtime_error("Unsupported type.");
   }
@@ -116,5 +125,26 @@ void MaxPool2D::evalQuantized() const
                                  getTensorShape(output()), getTensorData<uint8_t>(output()));
 }
 
+void MaxPool2D::evalSInt16() const
+{
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::PoolParams params{};
+  params.padding_values.height = _padding_height;
+  params.padding_values.width = _padding_width;
+  params.stride_height = _params.stride_height;
+  params.stride_width = _params.stride_width;
+  params.filter_height = _params.filter_height;
+  params.filter_width = _params.filter_width;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  tflite::reference_integer_ops::MaxPool(
+      params, getTensorShape(input()), getTensorData<int16_t>(input()), //
+      getTensorShape(output()), getTensorData<int16_t>(output()));
+}
+
 } // namespace kernels
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/MaxPool2D.h b/compiler/luci-interpreter/src/kernels/MaxPool2D.h
index 7a59ff022..bb7666305 100644
--- a/compiler/luci-interpreter/src/kernels/MaxPool2D.h
+++ b/compiler/luci-interpreter/src/kernels/MaxPool2D.h
@@ -39,6 +39,7 @@ public:
 private:
   void evalFloat() const;
   void evalQuantized() const;
+  void evalSInt16() const;
 
 private:
   int32_t _padding_height{};
diff --git a/compiler/luci-interpreter/src/kernels/MaxPool2D.test.cpp b/compiler/luci-interpreter/src/kernels/MaxPool2D.test.cpp
index 390255d89..1d7fe06c4 100644
--- a/compiler/luci-interpreter/src/kernels/MaxPool2D.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/MaxPool2D.test.cpp
@@ -54,8 +54,7 @@ TEST(MaxPool2DTest, Float)
       5, 6, //
   };
   std::initializer_list<int32_t> ref_output_shape{1, 2, 2, 1};
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
 }
 
@@ -66,11 +65,9 @@ TEST(MaxPool2DTest, Uint8)
       0,  -6, 12, 4, //
       -3, -2, 10, 7, //
   };
-  Tensor input_tensor{DataType::U8, {1, 2, 4, 1}, {{quant_param.first}, {quant_param.second}}, ""};
+  Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first,
+                                                      quant_param.second, input_data);
   Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
-  std::vector<uint8_t> quantize_input =
-      quantize<uint8_t>(input_data, quant_param.first, quant_param.second);
-  input_tensor.writeData(quantize_input.data(), quantize_input.size() * sizeof(uint8_t));
 
   Pool2DParams params{};
   params.padding = Padding::VALID;
@@ -86,12 +83,43 @@ TEST(MaxPool2DTest, Uint8)
 
   std::vector<float> ref_output_data{0.0, 6.0};
   std::initializer_list<int32_t> ref_output_shape{1, 1, 2, 1};
-  EXPECT_THAT(dequantize<uint8_t>(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(),
-                                  output_tensor.zero_point()),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
 }
 
+TEST(MaxPool2DTest, SInt16)
+{
+  Shape input_shape{1, 3, 5, 1};
+  std::vector<int32_t> ref_output_shape{1, 2, 2, 1};
+  std::vector<float> input_data{
+      1,  -1, 0,  -2, 2,  //
+      -7, -6, -5, -4, -3, //
+      5,  4,  3,  6,  7,  //
+  };
+  std::vector<float> ref_output_data{
+      1, 2, //
+      5, 6, //
+  };
+
+  Tensor input_tensor = makeInputTensor<DataType::S16>(input_shape, 0.2, 0, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.2, 0);
+
+  Pool2DParams params{};
+  params.padding = Padding::VALID;
+  params.filter_height = 2;
+  params.filter_width = 3;
+  params.stride_height = 1;
+  params.stride_width = 2;
+  params.activation = Activation::RELU6;
+
+  MaxPool2D kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
 } // namespace
 } // namespace kernels
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Maximum.cpp b/compiler/luci-interpreter/src/kernels/Maximum.cpp
new file mode 100644
index 000000000..c522b0706
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Maximum.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Maximum.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Maximum::Maximum(const Tensor *input1, const Tensor *input2, Tensor *output)
+    : Kernel({input1, input2}, {output})
+{
+}
+
+void Maximum::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type())
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type())
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Maximum::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalMaximum<float>();
+      break;
+    case DataType::U8:
+      evalMaximum<uint8_t>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T> inline void Maximum::evalMaximum() const
+{
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+                        getTensorShape(input2()), getTensorData<T>(input2()),
+                        getTensorShape(output()), getTensorData<T>(output()),
+                        [](T x, T y) { return std::max(x, y); });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Maximum.h b/compiler/luci-interpreter/src/kernels/Maximum.h
new file mode 100644
index 000000000..3c99e69c7
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Maximum.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_MAXIMUM_H
+#define LUCI_INTERPRETER_KERNELS_MAXIMUM_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Maximum : public Kernel
+{
+public:
+  Maximum(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> inline void evalMaximum() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_MAXIMUM_H
diff --git a/compiler/luci-interpreter/src/kernels/Maximum.test.cpp b/compiler/luci-interpreter/src/kernels/Maximum.test.cpp
new file mode 100644
index 000000000..2ddaeaf04
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Maximum.test.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Maximum.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(MaximumTest, Float)
+{
+  Shape input_shape{3, 1, 2};
+  std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
+  std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43};
+  Tensor input_tensor1 = makeInputTensor<DataType::FLOAT32>(input_shape, input_data1);
+  Tensor input_tensor2 = makeInputTensor<DataType::FLOAT32>(input_shape, input_data2);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Maximum kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{1.0, 0.0, 1.0, 12.0, -2.0, -1.43};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(MaximumTest, Uint8)
+{
+  Shape input_shape{3, 1, 2};
+  std::vector<uint8_t> input_data1{1, 0, 2, 11, 2, 23};
+  std::vector<uint8_t> input_data2{0, 0, 1, 12, 255, 1};
+  Tensor input_tensor1 = makeInputTensor<DataType::U8>(input_shape, input_data1);
+  Tensor input_tensor2 = makeInputTensor<DataType::U8>(input_shape, input_data2);
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Maximum kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<int32_t> ref_output_shape{2, 4};
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray({1, 0, 2, 12, 255, 23}));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Mean.cpp b/compiler/luci-interpreter/src/kernels/Mean.cpp
index 2394e2c0e..7d022eaf8 100644
--- a/compiler/luci-interpreter/src/kernels/Mean.cpp
+++ b/compiler/luci-interpreter/src/kernels/Mean.cpp
@@ -130,8 +130,13 @@ Mean::Mean(const Tensor *input, const Tensor *axes, Tensor *output, const Reduce
 
 void Mean::configure()
 {
-  assert(input()->element_type() == output()->element_type());
-  assert(axes()->element_type() == DataType::S32);
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(axes()->element_type() == DataType::S32);
+  if (input()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+  }
+
   const Shape &input_shape = input()->shape();
   int input_num_dims = input_shape.num_dims();
 
@@ -169,6 +174,9 @@ void Mean::execute() const
     case DataType::U8:
       evalQuantized();
       break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
     default:
       throw std::runtime_error("Unsupported type.");
   }
@@ -245,5 +253,74 @@ void Mean::evalQuantized() const
   }
 }
 
+void Mean::evalQuantizedS16() const
+{
+  const auto *input_data = getTensorData<int16_t>(input());
+  auto *output_data = getTensorData<int16_t>(output());
+
+  const Shape &input_shape = input()->shape();
+  const Shape &output_shape = output()->shape();
+
+  const auto *axes_data = getTensorData<int32_t>(axes());
+  const int num_axes = axes()->shape().num_elements();
+
+  constexpr int32_t output_min = -std::numeric_limits<int16_t>::max();
+  constexpr int32_t output_max = std::numeric_limits<int16_t>::max();
+
+  // Defer to specialized implementation for 4D Mean across axes 1 & 2.
+  if (_params.keep_dims && input_shape.num_dims() == 4 && num_axes == 2 &&
+      ((axes_data[0] == 1 && axes_data[1] == 2) || (axes_data[0] == 2 && axes_data[1] == 1)))
+  {
+    const int32_t batches = input_shape.dim(0);
+    const int32_t input_height = input_shape.dim(1);
+    const int32_t input_width = input_shape.dim(2);
+    const int32_t depth = input_shape.dim(3);
+    assert(output_shape.num_dims() == 4);
+    assert(output_shape.dim(0) == batches);
+    assert(output_shape.dim(1) == 1);
+    assert(output_shape.dim(2) == 1);
+    assert(output_shape.dim(3) == depth);
+
+    const double real_multiplier =
+        static_cast<double>(input()->scale()) / static_cast<double>(output()->scale());
+
+    int32_t output_multiplier{};
+    int output_shift{};
+    quantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
+
+    const int32_t num_elements_in_axes = input_height * input_width;
+
+    for (int32_t batch = 0; batch < batches; ++batch)
+    {
+      for (int32_t c = 0; c < depth; ++c)
+      {
+        int32_t acc = 0;
+        for (int32_t in_y = 0; in_y < input_height; ++in_y)
+        {
+          for (int32_t in_x = 0; in_x < input_width; ++in_x)
+          {
+            acc += input_data[calcOffset(input_shape, batch, in_y, in_x, c)];
+          }
+        }
+        int32_t scaled_acc =
+            tflite::MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+        // Divide by the number of elements rounding to the nearest integer.
+        scaled_acc = scaled_acc > 0
+                         ? (scaled_acc + num_elements_in_axes / 2) / num_elements_in_axes
+                         : (scaled_acc - num_elements_in_axes / 2) / num_elements_in_axes;
+
+        scaled_acc = std::max(scaled_acc, output_min);
+        scaled_acc = std::min(scaled_acc, output_max);
+
+        output_data[calcOffset(output_shape, batch, 0, 0, c)] = scaled_acc;
+      }
+    }
+  }
+  else
+  {
+    throw std::runtime_error("Unsupported configuration.");
+  }
+}
+
 } // namespace kernels
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Mean.h b/compiler/luci-interpreter/src/kernels/Mean.h
index 9cc793c72..1cc046894 100644
--- a/compiler/luci-interpreter/src/kernels/Mean.h
+++ b/compiler/luci-interpreter/src/kernels/Mean.h
@@ -42,6 +42,7 @@ public:
 private:
   void evalFloat() const;
   void evalQuantized() const;
+  void evalQuantizedS16() const;
 
 private:
   std::unique_ptr<Tensor> _temp_index;
diff --git a/compiler/luci-interpreter/src/kernels/Mean.test.cpp b/compiler/luci-interpreter/src/kernels/Mean.test.cpp
index f4e411ca4..e81d2ad5f 100644
--- a/compiler/luci-interpreter/src/kernels/Mean.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Mean.test.cpp
@@ -47,8 +47,7 @@ TEST(MeanTest, FloatKeepDims)
 
   std::vector<float> ref_output_data{10.5, 12.5, 14.5};
   std::initializer_list<int32_t> ref_output_shape{1, 3, 1};
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
 }
 
@@ -72,8 +71,7 @@ TEST(MeanTest, FloatKeepDims4DMean)
 
   std::vector<float> ref_output_data{6, 7, 18, 19};
   std::initializer_list<int32_t> ref_output_shape{2, 1, 1, 2};
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
 }
 
@@ -97,8 +95,7 @@ TEST(MeanTest, FloatNotKeepDims)
 
   std::vector<float> ref_output_data{12, 13};
   std::initializer_list<int32_t> ref_output_shape{2};
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
 }
 
@@ -109,12 +106,10 @@ TEST(MeanTest, Uint8KeepDims)
   std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
 
   std::vector<int32_t> axis_data{1};
-  Tensor input_tensor{DataType::U8, {3, 2}, {{quant_param.first}, {quant_param.second}}, ""};
+  Tensor input_tensor =
+      makeInputTensor<DataType::U8>({3, 2}, quant_param.first, quant_param.second, input_data);
   Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data);
   Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
-  std::vector<uint8_t> quantize_input =
-      quantize<uint8_t>(input_data, quant_param.first, quant_param.second);
-  input_tensor.writeData(quantize_input.data(), quantize_input.size() * sizeof(uint8_t));
 
   ReducerParams params{};
   params.keep_dims = true;
@@ -125,9 +120,8 @@ TEST(MeanTest, Uint8KeepDims)
 
   std::vector<float> ref_output_data{0.3, 0.35, 0.55};
   std::initializer_list<int32_t> ref_output_shape{3, 1};
-  EXPECT_THAT(dequantize<uint8_t>(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(),
-                                  output_tensor.zero_point()),
-              ElementsAreArray(ArrayFloatNear(ref_output_data, kQuantizedTolerance)));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kQuantizedTolerance));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
 }
 
@@ -138,12 +132,10 @@ TEST(MeanTest, Uint8NotKeepDims)
   std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
 
   std::vector<int32_t> axis_data{1};
-  Tensor input_tensor{DataType::U8, {1, 3, 2}, {{quant_param.first}, {quant_param.second}}, ""};
+  Tensor input_tensor =
+      makeInputTensor<DataType::U8>({1, 3, 2}, quant_param.first, quant_param.second, input_data);
   Tensor axis_tensor = makeInputTensor<DataType::S32>({1}, axis_data);
   Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
-  std::vector<uint8_t> quantize_input =
-      quantize<uint8_t>(input_data, quant_param.first, quant_param.second);
-  input_tensor.writeData(quantize_input.data(), quantize_input.size() * sizeof(uint8_t));
 
   ReducerParams params{};
   params.keep_dims = false;
@@ -154,12 +146,34 @@ TEST(MeanTest, Uint8NotKeepDims)
 
   std::vector<float> ref_output_data{0.4, 0.4};
   std::initializer_list<int32_t> ref_output_shape{1, 2};
-  EXPECT_THAT(dequantize<uint8_t>(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(),
-                                  output_tensor.zero_point()),
-              ElementsAreArray(ArrayFloatNear(ref_output_data, kQuantizedTolerance)));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kQuantizedTolerance));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
 }
 
+TEST(MeanTest, SInt16KeepDims4D)
+{
+  std::vector<float> input_data = {1.0,  2.0,  3.0,  4.0,  5.0,  6.0,  7.0,  8.0,
+                                   9.0,  10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                                   17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+  std::vector<int32_t> axes_data{1, 2};
+  std::vector<float> ref_output_data{6, 7, 18, 19};
+
+  Tensor input_tensor = makeInputTensor<DataType::S16>({2, 2, 3, 2}, 0.25, 0, input_data);
+  Tensor axes_tensor = makeInputTensor<DataType::S32>({2}, axes_data);
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.2, 0);
+
+  ReducerParams params{};
+  params.keep_dims = true;
+
+  Mean kernel(&input_tensor, &axes_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 1, 1, 2}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
 } // namespace
 } // namespace kernels
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Minimum.cpp b/compiler/luci-interpreter/src/kernels/Minimum.cpp
new file mode 100644
index 000000000..5eb13455e
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Minimum.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Minimum.h"
+
+#include "kernels/Utils.h"
+
+#include "kernels/BinaryOpCommon.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Minimum::Minimum(const Tensor *input1, const Tensor *input2, Tensor *output)
+    : Kernel({input1, input2}, {output})
+{
+}
+
+void Minimum::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type())
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == output()->element_type())
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Minimum::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalMinimum<float>();
+      break;
+    case DataType::U8:
+      evalMinimum<uint8_t>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T> inline void Minimum::evalMinimum() const
+{
+  BinaryOpBroadcastSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+                        getTensorShape(input2()), getTensorData<T>(input2()),
+                        getTensorShape(output()), getTensorData<T>(output()),
+                        [](T x, T y) { return std::min(x, y); });
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Minimum.h b/compiler/luci-interpreter/src/kernels/Minimum.h
new file mode 100644
index 000000000..5ff4035b4
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Minimum.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_MINIMUM_H
+#define LUCI_INTERPRETER_KERNELS_MINIMUM_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Minimum : public Kernel
+{
+public:
+  Minimum(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> inline void evalMinimum() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_MINIMUM_H
diff --git a/compiler/luci-interpreter/src/kernels/Minimum.test.cpp b/compiler/luci-interpreter/src/kernels/Minimum.test.cpp
new file mode 100644
index 000000000..b6420dd9b
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Minimum.test.cpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Minimum.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(MinimumTest, Float)
+{
+  Shape input_shape{3, 1, 2};
+  std::vector<float> input_data1{1.0, 0.0, -1.0, 11.0, -2.0, -1.44};
+  std::vector<float> input_data2{-1.0, 0.0, 1.0, 12.0, -3.0, -1.43};
+  Tensor input_tensor1 = makeInputTensor<DataType::FLOAT32>(input_shape, input_data1);
+  Tensor input_tensor2 = makeInputTensor<DataType::FLOAT32>(input_shape, input_data2);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Minimum kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<float> ref_output_data{-1.0, 0.0, -1.0, 11.0, -3.0, -1.44};
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(MinimumTest, Uint8)
+{
+  Shape input_shape{3, 1, 2};
+  std::vector<uint8_t> input_data1{1, 0, 2, 11, 2, 23};
+  std::vector<uint8_t> input_data2{0, 0, 1, 12, 255, 1};
+  Tensor input_tensor1 = makeInputTensor<DataType::U8>(input_shape, input_data1);
+  Tensor input_tensor2 = makeInputTensor<DataType::U8>(input_shape, input_data2);
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Minimum kernel(&input_tensor1, &input_tensor2, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  std::vector<int32_t> ref_output_shape{2, 4};
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray({0, 0, 1, 11, 2, 1}));
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Mul.test.cpp b/compiler/luci-interpreter/src/kernels/Mul.test.cpp
index f2255ac3f..fbda3bece 100644
--- a/compiler/luci-interpreter/src/kernels/Mul.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Mul.test.cpp
@@ -56,8 +56,7 @@ TEST(MulTest, Float)
     kernel.configure();
     kernel.execute();
 
-    EXPECT_THAT(extractTensorData<float>(output_tensor),
-                ::testing::ElementsAreArray(ArrayFloatNear(test_outputs[i], 0.0001f)))
+    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
         << "With shape number " << i;
   }
   // Re-run with exchanged inputs.
@@ -74,8 +73,7 @@ TEST(MulTest, Float)
     kernel.configure();
     kernel.execute();
 
-    EXPECT_THAT(extractTensorData<float>(output_tensor),
-                ::testing::ElementsAreArray(ArrayFloatNear(test_outputs[i], 0.0001f)))
+    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
         << "With shape number " << i;
   }
 }
diff --git a/compiler/luci-interpreter/src/kernels/NotEqual.cpp b/compiler/luci-interpreter/src/kernels/NotEqual.cpp
new file mode 100644
index 000000000..cd2f6c2c1
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/NotEqual.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/NotEqual.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/comparisons.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+NotEqual::NotEqual(const Tensor *x, const Tensor *y, Tensor *output) : Kernel({x, y}, {output}) {}
+
+void NotEqual::configure()
+{
+  LUCI_INTERPRETER_CHECK(x()->element_type() == y()->element_type());
+  LUCI_INTERPRETER_CHECK(output()->element_type() == DataType::BOOL);
+
+  if (x()->element_type() == DataType::U8)
+  {
+    quantizeMultiplierSmallerThanOneExp(x()->scale(), &_x_multiplier, &_x_shift);
+    quantizeMultiplierSmallerThanOneExp(y()->scale(), &_y_multiplier, &_y_shift);
+  }
+  output()->resize(calculateShapeForBroadcast(x()->shape(), y()->shape()));
+}
+
+void NotEqual::execute() const
+{
+  switch (x()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void NotEqual::evalFloat() const
+{
+  const auto x_data = getTensorData<float>(x());
+  const auto y_data = getTensorData<float>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowNotEqual(op_params, getTensorShape(x()), x_data,
+                                                   getTensorShape(y()), y_data,
+                                                   getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::NotEqual(op_params, getTensorShape(x()), x_data, getTensorShape(y()),
+                                    y_data, getTensorShape(output()), output_data);
+  }
+}
+
+void NotEqual::evalQuantized() const
+{
+  const auto x_data = getTensorData<uint8_t>(x());
+  const auto y_data = getTensorData<uint8_t>(y());
+  auto output_data = getTensorData<bool>(output());
+
+  tflite::ComparisonParams op_params;
+  op_params.left_shift = 8;
+  op_params.input1_offset = -x()->zero_point(); // Note the '-'
+  op_params.input1_shift = _x_shift;
+  op_params.input1_multiplier = _x_multiplier;
+  op_params.input2_offset = -y()->zero_point(); // Note the '-'
+  op_params.input2_shift = _y_shift;
+  op_params.input2_multiplier = _y_multiplier;
+  op_params.is_broadcast = x()->shape() != y()->shape();
+
+  if (op_params.is_broadcast)
+  {
+    tflite::reference_ops::Broadcast4DSlowNotEqualWithScaling(
+        op_params, getTensorShape(x()), x_data, getTensorShape(y()), y_data,
+        getTensorShape(output()), output_data);
+  }
+  else
+  {
+    tflite::reference_ops::NotEqualWithScaling(op_params, getTensorShape(x()), x_data,
+                                               getTensorShape(y()), y_data,
+                                               getTensorShape(output()), output_data);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/NotEqual.h b/compiler/luci-interpreter/src/kernels/NotEqual.h
new file mode 100644
index 000000000..d729c6c14
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/NotEqual.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_NOT_EQUAL_H
+#define LUCI_INTERPRETER_KERNELS_NOT_EQUAL_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class NotEqual : public Kernel
+{
+public:
+  NotEqual(const Tensor *x, const Tensor *y, Tensor *output);
+
+  const Tensor *x() const { return _inputs[0]; }
+  const Tensor *y() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _x_multiplier = 0;
+  int32_t _x_shift = 0;
+  int32_t _y_multiplier = 0;
+  int32_t _y_shift = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_NOT_EQUAL_H
diff --git a/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp b/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp
new file mode 100644
index 000000000..8c8712371
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/NotEqual.test.cpp
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/NotEqual.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(NotEqualTest, FloatSimple)
+{
+  std::vector<float> x_data{
+      0.5, 0.7, 0.9, // Row 1
+      1,   0,   -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+      0.9, 0.7, 0.5, // Row 1
+      -1,  0,   1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+      true, false, true, // Row 1
+      true, false, true, // Row 2
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, x_data);
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, y_data);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST(NotEqualTest, FloatBroardcast)
+{
+  std::vector<float> x_data{
+      0.5, 0.7, 0.9, // Row 1
+      1,   0,   -1,  // Row 2
+      -1,  0,   1,   // Row 3
+      0.9, 0.7, 0.5, // Row 4
+  };
+
+  std::vector<float> y_data{
+      0.9, 0.7, 0.5, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+      true,  false, true,  // Row 1
+      true,  true,  true,  // Row 2
+      true,  true,  true,  // Row 3
+      false, false, false, // Row 4
+  };
+
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({4, 3}, x_data);
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1, 3}, y_data);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({4, 3}));
+}
+
+// Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+const float F_MIN = -128.0 / 128.0;
+const float F_MAX = 127.0 / 128.0;
+
+TEST(NotEqualTest, Uint8Quantized)
+{
+  std::vector<float> x_data{
+      0.5, 0.5, 0.7,  0.9, // Row 1
+      1,   0,   0.05, -1,  // Row 2
+  };
+
+  std::vector<float> y_data{
+      0.9, 0.5, 0.55, 0.5, // Row 1
+      -1,  0,   0.05, 1,   // Row 2
+  };
+
+  std::vector<bool> ref_output_data{
+      true, false, true,  true, // Row 1
+      true, false, false, true, // Row 2
+  };
+
+  std::pair<float, int32_t> x_quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, x_quant_param.first,
+                                                  x_quant_param.second, x_data);
+
+  std::pair<float, int32_t> y_quant_param = quantizationParams<uint8_t>(F_MIN * 2, F_MAX * 2);
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, y_quant_param.first,
+                                                  y_quant_param.second, y_data);
+
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST(NotEqualTest, Uint8QuantizedBroadcast)
+{
+  std::vector<float> x_data{
+      0.4,  -0.8, 0.7,  0.3, // Row 1
+      -0.5, 0.1,  0,    0.5, // Row 2
+      1,    0,    0.05, -1,  // Row 3
+      -1,   0.05, 0,    1,   // Row 4
+  };
+
+  std::vector<float> y_data{
+      -1, 0.05, 0, 1, // Row 1
+  };
+
+  std::vector<bool> ref_output_data{
+      true,  true,  true,  true,  // Row 1
+      true,  true,  false, true,  // Row 2
+      true,  true,  true,  true,  // Row 3
+      false, false, false, false, // Row 4
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(F_MIN, F_MAX);
+  Tensor x_tensor =
+      makeInputTensor<DataType::U8>({1, 4, 4, 1}, quant_param.first, quant_param.second, x_data);
+  Tensor y_tensor =
+      makeInputTensor<DataType::U8>({1, 1, 4, 1}, quant_param.first, quant_param.second, y_data);
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 4, 1}));
+  EXPECT_THAT(extractTensorData<bool>(output_tensor), ::testing::ElementsAreArray(ref_output_data));
+}
+
+TEST(NotEqualTest, Input_Type_Mismatch_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor y_tensor = makeInputTensor<DataType::U8>({1}, {1});
+  Tensor output_tensor = makeOutputTensor(DataType::BOOL);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(NotEqualTest, Input_Output_Type_NEG)
+{
+  Tensor x_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor y_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  NotEqual kernel(&x_tensor, &y_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Pad.test.cpp b/compiler/luci-interpreter/src/kernels/Pad.test.cpp
index 15fcd0da3..4bee07629 100644
--- a/compiler/luci-interpreter/src/kernels/Pad.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Pad.test.cpp
@@ -34,12 +34,10 @@ TEST(Pad, Uint8)
   std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
   std::vector<float> input_data{-0.8, 0.2, 0.9, 0.7, 0.1, -0.3};
   std::vector<int32_t> paddings_data{0, 0, 0, 2, 1, 3, 0, 0};
-  Tensor input_tensor{DataType::U8, {1, 2, 3, 1}, {{quant_param.first}, {quant_param.second}}, ""};
+  Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 3, 1}, quant_param.first,
+                                                      quant_param.second, input_data);
   Tensor paddings_tensor = makeInputTensor<DataType::S32>({4, 2}, paddings_data);
   Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
-  std::vector<uint8_t> quantize_input =
-      quantize<uint8_t>(input_data, quant_param.first, quant_param.second);
-  input_tensor.writeData(quantize_input.data(), quantize_input.size() * sizeof(uint8_t));
 
   Pad kernel(&input_tensor, &paddings_tensor, &output_tensor);
   kernel.configure();
@@ -47,9 +45,8 @@ TEST(Pad, Uint8)
 
   std::vector<float> ref_output_data{0, -0.8, 0.2, 0.9, 0, 0, 0, 0, 0.7, 0.1, -0.3, 0, 0, 0,
                                      0, 0,    0,   0,   0, 0, 0, 0, 0,   0,   0,    0, 0, 0};
-  EXPECT_THAT(dequantize(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(),
-                         output_tensor.zero_point()),
-              ElementsAreArray(ArrayFloatNear(ref_output_data, kQuantizedTolerance)));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kQuantizedTolerance));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 4, 7, 1}));
 }
 
@@ -69,8 +66,7 @@ TEST(Pad, Float)
                                      0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 0, 0, 0, 4, 5,
                                      6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
   std::initializer_list<int32_t> ref_output_shape{2, 4, 6, 1};
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
 }
 
diff --git a/compiler/luci-interpreter/src/kernels/Pow.cpp b/compiler/luci-interpreter/src/kernels/Pow.cpp
new file mode 100644
index 000000000..afc10b80e
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Pow.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pow.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Pow::Pow(const Tensor *input1, const Tensor *input2, Tensor *output)
+    : Kernel({input1, input2}, {output})
+{
+}
+
+void Pow::configure()
+{
+  LUCI_INTERPRETER_CHECK(input1()->element_type() == input2()->element_type());
+
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Pow::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      eval<float>();
+      break;
+    case DataType::S32:
+      eval<int32_t>();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+template <typename T> void Pow::eval() const
+{
+  tflite::ArithmeticParams params{};
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+      getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastPow4DSlow(getTensorShape(input1()), getTensorData<T>(input1()),
+                                              getTensorShape(input2()), getTensorData<T>(input2()),
+                                              getTensorShape(output()), getTensorData<T>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Pow(getTensorShape(input1()), getTensorData<T>(input1()),
+                               getTensorShape(input2()), getTensorData<T>(input2()),
+                               getTensorShape(output()), getTensorData<T>(output()));
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Pow.h b/compiler/luci-interpreter/src/kernels/Pow.h
new file mode 100644
index 000000000..8ff865e40
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Pow.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_POW_H
+#define LUCI_INTERPRETER_KERNELS_POW_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Pow : public Kernel
+{
+public:
+  Pow(const Tensor *input1, const Tensor *input2, Tensor *output);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  template <typename T> void eval() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_POW_H
diff --git a/compiler/luci-interpreter/src/kernels/Pow.test.cpp b/compiler/luci-interpreter/src/kernels/Pow.test.cpp
new file mode 100644
index 000000000..69d8946c8
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Pow.test.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Pow.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(PowTest, SimplePow)
+{
+  std::initializer_list<int32_t> base_shape = {1, 1, 3, 2};
+
+  std::vector<float> input1_data{0.3f, 2.3f, 0.9f, 0.5f, 0.8f, 1.1f};
+  std::vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+  std::vector<float> test_outputs{0.786f, 1.2838f, 1.043f, 0.7071f, 0.8f, 1.08956f};
+
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data);
+  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input2_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(base_shape));
+}
+
+TEST(PowTest, FloatBroadcastPow)
+{
+  std::initializer_list<int32_t> input1_shape = {1, 3};
+  std::initializer_list<int32_t> input2_shape = {3, 1};
+
+  std::vector<float> input1_data{0.3f, 2.3f, 0.9f};
+  std::vector<float> input2_data{0.2f, 0.3f, 0.4f};
+  std::vector<float> test_outputs{0.786f,   1.18126f, 0.9791f, 0.6968f, 1.28386f,
+                                  0.96888f, 0.6178f,  1.3953f, 0.9587f};
+
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(input1_shape, input1_data);
+  Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(input2_shape, input2_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs, 0.0001f));
+}
+
+TEST(PowTest, IntPow)
+{
+  std::initializer_list<int32_t> base_shape = {1, 3};
+
+  std::vector<int32_t> input_data{2, 3, 4};
+  std::vector<int32_t> test_outputs{4, 27, 256};
+
+  Tensor input1_tensor = makeInputTensor<DataType::S32>(base_shape, input_data);
+  Tensor input2_tensor = makeInputTensor<DataType::S32>(base_shape, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::S32);
+
+  Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<int32_t>(output_tensor), ::testing::ElementsAreArray(test_outputs));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(base_shape));
+}
+
+TEST(PowTest, Input_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.0f});
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {4});
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Pow kernel(&input1_tensor, &input2_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Prelu.cpp b/compiler/luci-interpreter/src/kernels/Prelu.cpp
new file mode 100644
index 000000000..e658d87b5
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Prelu.cpp
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Prelu.h"
+
+#include "kernels/BinaryOpCommon.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Prelu::Prelu(const Tensor *input, const Tensor *alpha, Tensor *output)
+    : Kernel({input, alpha}, {output})
+{
+}
+
+void Prelu::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(alpha()->element_type() == output()->element_type());
+
+  if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16)
+  {
+    if (input()->element_type() == DataType::S16)
+    {
+      LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && alpha()->zero_point() == 0 &&
+                             output()->zero_point() == 0);
+    }
+    double alpha_multiplier = input()->scale() * alpha()->scale() / output()->scale();
+    quantizeMultiplier(alpha_multiplier, &_output_multiplier_alpha, &_output_shift_alpha);
+    double identity_multiplier = input()->scale() / output()->scale();
+    quantizeMultiplier(identity_multiplier, &_output_multiplier_identity, &_output_shift_identity);
+  }
+  output()->resize(calculateShapeForBroadcast(input()->shape(), alpha()->shape()));
+}
+
+void Prelu::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Prelu::evalFloat() const
+{
+  const auto input_data = getTensorData<float>(input());
+  const auto alpha_data = getTensorData<float>(alpha());
+  const auto size = getTensorShape(input()).FlatSize();
+  auto output_data = getTensorData<float>(output());
+
+  auto PreluFunc = [](float input, float alpha) { return input >= 0.0 ? input : input * alpha; };
+
+  if (input()->shape() != alpha()->shape())
+  {
+    tflite::reference_ops::BroadcastBinaryFunction4DSlow<float, float, float>(
+        getTensorShape(input()), getTensorData<float>(input()), getTensorShape(alpha()),
+        getTensorData<float>(alpha()), getTensorShape(output()), getTensorData<float>(output()),
+        PreluFunc);
+  }
+  else
+  {
+    for (auto i = decltype(size){0}; i < size; ++i)
+    {
+      if (input_data[i] >= 0)
+        output_data[i] = input_data[i];
+      else
+        output_data[i] = input_data[i] * alpha_data[i];
+    }
+  }
+}
+
+void Prelu::evalQuantized() const
+{
+  tflite::PreluParams op_params{};
+
+  op_params.input_offset = -input()->zero_point(); // Note the '-'.
+  op_params.alpha_offset = -alpha()->zero_point(); // Note the '-'.
+  op_params.output_offset = output()->zero_point();
+  op_params.output_shift_1 = _output_shift_identity;
+  op_params.output_multiplier_1 = _output_multiplier_identity;
+  op_params.output_shift_2 = _output_shift_alpha;
+  op_params.output_multiplier_2 = _output_multiplier_alpha;
+
+  if (input()->shape() != alpha()->shape())
+  {
+    tflite::reference_ops::BroadcastPrelu4DSlow(
+        op_params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+        getTensorShape(alpha()), getTensorData<uint8_t>(alpha()), getTensorShape(output()),
+        getTensorData<uint8_t>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Prelu<uint8_t>(op_params, getTensorShape(input()),
+                                          getTensorData<uint8_t>(input()), getTensorShape(alpha()),
+                                          getTensorData<uint8_t>(alpha()), getTensorShape(output()),
+                                          getTensorData<uint8_t>(output()));
+  }
+}
+
+void Prelu::evalQuantizedS16() const
+{
+  constexpr int32_t quantized_min = std::numeric_limits<int16_t>::min();
+  constexpr int32_t quantized_max = std::numeric_limits<int16_t>::max();
+
+  auto fn = [this, quantized_min, quantized_max](int16_t input_val, int16_t alpha_val) {
+    const int32_t output_val =
+        input_val >= 0
+            ? tflite::MultiplyByQuantizedMultiplier(input_val, _output_multiplier_identity,
+                                                    _output_shift_identity)
+            : tflite::MultiplyByQuantizedMultiplier(input_val * alpha_val, _output_multiplier_alpha,
+                                                    _output_shift_alpha);
+    const int32_t clamped_output = std::min(quantized_max, std::max(quantized_min, output_val));
+    return static_cast<int16_t>(clamped_output);
+  };
+
+  BinaryOpBroadcastSlow(getTensorShape(input()), getTensorData<int16_t>(input()),
+                        getTensorShape(alpha()), getTensorData<int16_t>(alpha()),
+                        getTensorShape(output()), getTensorData<int16_t>(output()), fn);
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Prelu.h b/compiler/luci-interpreter/src/kernels/Prelu.h
new file mode 100644
index 000000000..c7911a63f
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Prelu.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_PRELU_H
+#define LUCI_INTERPRETER_KERNELS_PRELU_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Prelu : public Kernel
+{
+public:
+  Prelu(const Tensor *input, const Tensor *alpha, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *alpha() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void evalQuantizedS16() const;
+
+private:
+  int32_t _output_multiplier_alpha = 0;
+  int32_t _output_shift_alpha = 0;
+  int32_t _output_multiplier_identity = 0;
+  int32_t _output_shift_identity = 0;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_PRELU_H
diff --git a/compiler/luci-interpreter/src/kernels/Prelu.test.cpp b/compiler/luci-interpreter/src/kernels/Prelu.test.cpp
new file mode 100644
index 000000000..30702c826
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Prelu.test.cpp
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Prelu.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> alpha_shape,
+           std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data,
+           std::initializer_list<T> alpha_data, std::initializer_list<T> output_data)
+{
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
+  Tensor alpha_tensor = makeInputTensor<element_type>(alpha_shape, alpha_data);
+  Tensor output_tensor = makeOutputTensor(element_type);
+
+  Prelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<T>(output_tensor), ::testing::ElementsAreArray(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+}
+
+TEST(PreluTest, FloatSimple)
+{
+  Check<float>(/*input_shape=*/{2, 3}, /*alpha_shape=*/{2, 3},
+               /*output_shape=*/{2, 3},
+               /*input_data=*/
+               {
+                   0.0f, 1.0f, 3.0f,   // Row 1
+                   1.0f, -1.0f, -2.0f, // Row 2
+               },
+               /*alpha_data=*/
+               {
+                   0.0f, 0.5f, 0.1f, // Row 1
+                   0.0f, 0.5f, 0.1f, // Row 2
+               },
+               /*output_data=*/
+               {
+                   0.0f, 1.0f, 3.0f,   // Row 1
+                   1.0f, -0.5f, -0.2f, // Row 2
+               });
+
+  SUCCEED();
+}
+
+TEST(PreluTest, FloatBroadcast)
+{
+  Check<float>(/*input_shape=*/{1, 2, 2, 3}, /*alpha_shape=*/{1, 1, 3},
+               /*output_shape=*/{1, 2, 2, 3},
+               /*input_data=*/
+               {
+                   0.0f, 0.0f, 0.0f,    // Row 1, Column 1
+                   1.0f, 1.0f, 1.0f,    // Row 1, Column 2
+                   -1.0f, -1.0f, -1.0f, // Row 2, Column 1
+                   -2.0f, -2.0f, -2.0f, // Row 2, Column 2
+               },
+               /*alpha_data=*/
+               {0.0f, 1.0f, 2.0f},
+               /*output_data=*/
+               {
+                   0.0f, 0.0f, 0.0f,   // Row 1, Column 1
+                   1.0f, 1.0f, 1.0f,   // Row 1, Column 2
+                   0.0f, -1.0f, -2.0f, // Row 2, Column 1
+                   0.0f, -2.0f, -4.0f, // Row 2, Column 2
+               });
+
+  SUCCEED();
+}
+
+float GetTolerance(float min, float max) { return (max - min) / 255.0; }
+
+TEST(PreluTest, Uint8Simple)
+{
+  std::vector<float> input_data{-0.8f, 0.2f, 0.9f, 0.7f, 0.1f, -0.4f};
+  std::vector<float> alpha_data{0.5f, 0.5f, 0.5f, 0.25f, 1.0f, 0.25f};
+  std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, 0.7f, 0.1f, -0.1f};
+
+  float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-1.0f, 1.0f);
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 3, 1}, quant_param.first,
+                                                      quant_param.second, input_data);
+  Tensor alpha_tensor = makeInputTensor<DataType::U8>({1, 2, 3, 1}, quant_param.first,
+                                                      quant_param.second, alpha_data);
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  Prelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 3, 1}));
+
+  SUCCEED();
+}
+
+TEST(PreluTest, Uint8Broadcast)
+{
+  std::vector<float> input_data{
+      0.0f,   0.0f,   0.0f,   // Row 1, Column 1
+      0.5f,   0.5f,   0.5f,   // Row 1, Column 2
+      -1.0f,  -1.0f,  -1.0f,  // Row 2, Column 1
+      -0.25f, -0.25f, -0.25f, // Row 2, Column 2
+  };
+  std::vector<float> alpha_data{0.0f, 0.5f, -0.5f};
+  std::vector<float> ref_output_data{
+      0.0f, 0.0f,    0.0f,  // Row 1, Column 1
+      0.5f, 0.5f,    0.5f,  // Row 1, Column 2
+      0.0f, -0.5f,   0.5f,  // Row 2, Column 1
+      0.0f, -0.125f, 0.125f // Row 2, Column 2
+  };
+  std::vector<float> ref_quant_output_data{
+      128, 128, 128, // Row 1, Column 1
+      192, 192, 192, // Row 1, Column 2
+      128, 64,  192, // Row 2, Column 1
+      128, 112, 144  // Row 2, Column 2
+  };
+  float kQuantizedTolerance = 2 * (1. / 256);
+  const float kMin = -1;
+  const float kMax = 127.f / 128.f;
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(kMin, kMax);
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 2, 3}, quant_param.first,
+                                                      quant_param.second, input_data);
+  Tensor alpha_tensor =
+      makeInputTensor<DataType::U8>({1, 1, 3}, quant_param.first, quant_param.second, alpha_data);
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  Prelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(ref_output_data, kQuantizedTolerance));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 3}));
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray(ref_quant_output_data));
+}
+
+TEST(PreluTest, SInt16Simple)
+{
+  std::vector<float> input_data{-0.8f, 0.2f, 0.9f, 0.7f, 0.1f, -0.4f};
+  std::vector<float> alpha_data{0.5f, 0.5f, 0.5f, 0.25f, 1.0f, 0.25f};
+  std::vector<float> ref_output_data{-0.4f, 0.2f, 0.9f, 0.7f, 0.1f, -0.1f};
+
+  Tensor input_tensor = makeInputTensor<DataType::S16>({1, 2, 3, 1}, 0.1, 0, input_data);
+  Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 2, 3, 1}, 0.1, 0, alpha_data);
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.1, 0);
+
+  Prelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 3, 1}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(PreluTest, SInt16Broadcast)
+{
+  std::vector<float> input_data{
+      0.0f,   0.0f,   0.0f,   // Row 1, Column 1
+      0.5f,   0.5f,   0.5f,   // Row 1, Column 2
+      -1.0f,  -1.0f,  -1.0f,  // Row 2, Column 1
+      -0.25f, -0.25f, -0.25f, // Row 2, Column 2
+  };
+  std::vector<float> alpha_data{0.0f, 0.5f, -0.5f};
+  std::vector<float> ref_output_data{
+      0.0f, 0.0f,    0.0f,  // Row 1, Column 1
+      0.5f, 0.5f,    0.5f,  // Row 1, Column 2
+      0.0f, -0.5f,   0.5f,  // Row 2, Column 1
+      0.0f, -0.125f, 0.125f // Row 2, Column 2
+  };
+
+  Tensor input_tensor = makeInputTensor<DataType::S16>({1, 2, 2, 3}, 0.01, 0, input_data);
+  Tensor alpha_tensor = makeInputTensor<DataType::S16>({1, 1, 3}, 0.1, 0, alpha_data);
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.001, 0);
+
+  Prelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 2, 3}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(PreluTest, Input_Output_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor alpha_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Prelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PreluTest, Input_Alpha_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor alpha_tensor = makeInputTensor<DataType::U8>({1}, {1});
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Prelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(PreluTest, Invalid_Input_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1});
+  Tensor alpha_tensor = makeInputTensor<DataType::S64>({1}, {1});
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  Prelu kernel(&input_tensor, &alpha_tensor, &output_tensor);
+  kernel.configure();
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Relu.cpp b/compiler/luci-interpreter/src/kernels/Relu.cpp
new file mode 100644
index 000000000..a2e02d708
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Relu.cpp
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Relu.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Relu::Relu(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Relu::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  if (input()->element_type() == DataType::S16)
+  {
+    LUCI_INTERPRETER_CHECK(input()->zero_point() == 0 && output()->zero_point() == 0);
+  }
+
+  if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S16)
+  {
+    double multiplier = input()->scale() / output()->scale();
+    quantizeMultiplier(multiplier, &_output_multiplier, &_output_shift);
+  }
+  output()->resize(input()->shape());
+}
+
+void Relu::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    case DataType::S16:
+      evalQuantizedS16();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Relu::evalFloat() const
+{
+  const auto input_data = getTensorData<float>(input());
+  const auto input_shape = getTensorShape(input());
+  auto output_data = getTensorData<float>(output());
+  auto output_shape = getTensorShape(output());
+
+  tflite::optimized_ops::Relu(input_shape, input_data, output_shape, output_data);
+}
+
+void Relu::evalQuantized() const
+{
+  tflite::ReluParams params;
+  params.input_offset = input()->zero_point();
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = _output_multiplier;
+  params.output_shift = _output_shift;
+
+  params.quantized_activation_min =
+      std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset);
+  params.quantized_activation_max = static_cast<int32_t>(std::numeric_limits<uint8_t>::max());
+
+  tflite::optimized_ops::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+                               getTensorShape(output()), getTensorData<uint8_t>(output()));
+}
+
+void Relu::evalQuantizedS16() const
+{
+  const auto *input_data = getTensorData<int16_t>(input());
+  auto *output_data = getTensorData<int16_t>(output());
+
+  constexpr int32_t output_min = 0;
+  constexpr int32_t output_max = std::numeric_limits<int16_t>::max();
+
+  const int32_t num_elements = input()->shape().num_elements();
+
+  for (int32_t i = 0; i < num_elements; ++i)
+  {
+    const int32_t input_val = input_data[i];
+    int32_t output_val =
+        tflite::MultiplyByQuantizedMultiplier(input_val, _output_multiplier, _output_shift);
+    output_val = std::max(output_val, output_min);
+    output_val = std::min(output_val, output_max);
+    output_data[i] = static_cast<int16_t>(output_val);
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Relu.h b/compiler/luci-interpreter/src/kernels/Relu.h
new file mode 100644
index 000000000..b813f0cdf
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Relu.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RELU_H
+#define LUCI_INTERPRETER_KERNELS_RELU_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Relu : public Kernel
+{
+public:
+  Relu(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+  void evalQuantizedS16() const;
+
+private:
+  int32_t _output_multiplier{0};
+  int32_t _output_shift{0};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RELU_H
diff --git a/compiler/luci-interpreter/src/kernels/Relu.test.cpp b/compiler/luci-interpreter/src/kernels/Relu.test.cpp
new file mode 100644
index 000000000..cabefa733
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Relu.test.cpp
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Relu.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(ReluTest, FloatSimple)
+{
+  std::vector<float> input_data{
+      0.0f, 1.0f,  3.0f,  // Row 1
+      1.0f, -1.0f, -2.0f, // Row 2
+  };
+
+  std::vector<float> ref_output_data{
+      0.0f, 1.0f, 3.0f, // Row 1
+      1.0f, 0.0f, 0.0f, // Row 2
+  };
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Relu kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST(ReluTest, Uint8Quantized)
+{
+  std::vector<float> input_data{
+      0, -6, 2, 4, //
+      3, -2, 7, 1, //
+  };
+  // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+  const float f_min = (-128.0 / 128.0) * 8;
+  const float f_max = (127.0 / 128.0) * 8;
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(f_min, f_max);
+  Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first,
+                                                      quant_param.second, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  Relu kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray({128, 128, 160, 192, 176, 128, 240, 144}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0, 0, 2, 4, 3, 0, 7, 1}));
+}
+
+TEST(ReluTest, Uint8Requantized)
+{
+  std::vector<float> input_data{
+      0, -6, 2, 4, //
+      3, -2, 7, 1, //
+  };
+
+  // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+  const float in_min = (-128.0 / 128.0) * 8;
+  const float in_max = (127.0 / 128.0) * 8;
+  const float out_min = (0.0 / 256.0) * 8;
+  const float out_max = (255.0 / 256.0) * 8;
+
+  std::pair<float, int32_t> quant_input = quantizationParams<uint8_t>(in_min, in_max);
+  Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_input.first,
+                                                      quant_input.second, input_data);
+
+  std::pair<float, int32_t> quant_output = quantizationParams<uint8_t>(out_min, out_max);
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_output.first, quant_output.second);
+
+  Relu kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray({0, 0, 64, 128, 96, 0, 224, 32}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear({0, 0, 2, 4, 3, 0, 7, 1}));
+}
+
+TEST(ReluTest, SInt16)
+{
+  std::vector<float> input_data{
+      0, -6, 2, 4, //
+      3, -2, 7, 1, //
+  };
+  std::vector<float> ref_output_data{
+      0, 0, 2, 4, //
+      3, 0, 7, 1, //
+  };
+
+  Tensor input_tensor = makeInputTensor<DataType::S16>({1, 2, 4, 1}, 0.5, 0, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::S16, 0.25, 0);
+
+  Relu kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
+
+TEST(ReluTest, Input_Output_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Relu kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ReluTest, Invalid_Input_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1});
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  Relu kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Relu6.cpp b/compiler/luci-interpreter/src/kernels/Relu6.cpp
new file mode 100644
index 000000000..1046ef27b
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Relu6.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Relu6.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+
+namespace kernels
+{
+
+Relu6::Relu6(const Tensor *input, Tensor *output) : Kernel({input}, {output}) {}
+
+void Relu6::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+
+  if (input()->element_type() == DataType::U8)
+  {
+    double multiplier = input()->scale() / output()->scale();
+    quantizeMultiplier(multiplier, &_output_multiplier, &_output_shift);
+  }
+  output()->resize(input()->shape());
+}
+
+void Relu6::execute() const
+{
+  switch (input()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Relu6::evalFloat() const
+{
+  const auto input_data = getTensorData<float>(input());
+  const auto input_shape = getTensorShape(input());
+  auto output_data = getTensorData<float>(output());
+  auto output_shape = getTensorShape(output());
+
+  tflite::optimized_ops::Relu6(input_shape, input_data, output_shape, output_data);
+}
+
+void Relu6::evalQuantized() const
+{
+  tflite::ReluParams params;
+  params.input_offset = input()->zero_point();
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = _output_multiplier;
+  params.output_shift = _output_shift;
+
+  params.quantized_activation_min =
+      std::max(static_cast<int32_t>(std::numeric_limits<uint8_t>::min()), params.output_offset);
+  params.quantized_activation_max =
+      std::min(static_cast<int32_t>(std::numeric_limits<uint8_t>::max()),
+               params.output_offset + static_cast<int32>(roundf(6.f / output()->scale())));
+
+  tflite::optimized_ops::ReluX(params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+                               getTensorShape(output()), getTensorData<uint8_t>(output()));
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Relu6.h b/compiler/luci-interpreter/src/kernels/Relu6.h
new file mode 100644
index 000000000..f5030b588
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Relu6.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RELU6_H
+#define LUCI_INTERPRETER_KERNELS_RELU6_H
+
+#include "core/Kernel.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Relu6 : public Kernel
+{
+public:
+  Relu6(const Tensor *input, Tensor *output);
+
+  const Tensor *input() const { return _inputs[0]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+
+private:
+  int32_t _output_multiplier{0};
+  int32_t _output_shift{0};
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RELU6_H
diff --git a/compiler/luci-interpreter/src/kernels/Relu6.test.cpp b/compiler/luci-interpreter/src/kernels/Relu6.test.cpp
new file mode 100644
index 000000000..a7f104d85
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Relu6.test.cpp
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Relu6.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+TEST(Relu6Test, FloatSimple)
+{
+  std::vector<float> input_data{
+      0.0f, 1.0f,  3.0f,  // Row 1
+      7.0f, -1.0f, -2.0f, // Row 2
+  };
+
+  std::vector<float> ref_output_data{
+      0.0f, 1.0f, 3.0f, // Row 1
+      6.0f, 0.0f, 0.0f, // Row 2
+  };
+
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 3}, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  Relu6 kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorData<float>(output_tensor),
+              ::testing::ElementsAreArray(ref_output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({2, 3}));
+}
+
+TEST(Relu6Test, Uint8Quantized)
+{
+  // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+  const float f_min = (-128.0 / 128.0) * 10;
+  const float f_max = (127.0 / 128.0) * 10;
+  const float tolerance = (f_max - f_min) / 255.0;
+
+  std::vector<float> input_data{
+      0,  -6, 2, 8, //
+      -2, 3,  7, 1, //
+  };
+
+  std::pair<float, int32_t> quant_param = quantizationParams<uint8_t>(f_min, f_max);
+  Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_param.first,
+                                                      quant_param.second, input_data);
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.second);
+
+  Relu6 kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray({128, 128, 154, 205, 128, 166, 205, 141}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear({0, 0, 2, 6, 0, 3, 6, 1}, tolerance));
+}
+
+TEST(Relu6Test, Uint8Requantized)
+{
+  // Choose min / max in such a way that there are exactly 256 units to avoid rounding errors.
+  const float in_min = (-128.0 / 128.0) * 10;
+  const float in_max = (127.0 / 128.0) * 10;
+  const float out_min = (0.0 / 256.0) * 0;
+  const float out_max = (255.0 / 256.0) * 6;
+  const float tolerance = (in_max - in_min) / 255.0;
+
+  std::vector<float> input_data{
+      0,  -6, 2, 8, //
+      -2, 3,  7, 1, //
+  };
+
+  std::pair<float, int32_t> quant_input = quantizationParams<uint8_t>(in_min, in_max);
+  Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 4, 1}, quant_input.first,
+                                                      quant_input.second, input_data);
+
+  std::pair<float, int32_t> quant_output = quantizationParams<uint8_t>(out_min, out_max);
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_output.first, quant_output.second);
+
+  Relu6 kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray({1, 2, 4, 1}));
+  EXPECT_THAT(extractTensorData<uint8_t>(output_tensor),
+              ::testing::ElementsAreArray({0, 0, 87, 255, 0, 127, 255, 43}));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear({0, 0, 2, 6, 0, 3, 6, 1}, tolerance));
+}
+
+TEST(Relu6Test, Input_Output_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor output_tensor = makeOutputTensor(DataType::U8);
+
+  Relu6 kernel(&input_tensor, &output_tensor);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(Relu6Test, Invalid_Input_Type_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::S64>({1}, {1});
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  Relu6 kernel(&input_tensor, &output_tensor);
+  kernel.configure();
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Reshape.test.cpp b/compiler/luci-interpreter/src/kernels/Reshape.test.cpp
index 7255b8132..38159380f 100644
--- a/compiler/luci-interpreter/src/kernels/Reshape.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Reshape.test.cpp
@@ -42,8 +42,7 @@ TEST(ReshapeTest, Regular)
   kernel.configure();
   kernel.execute();
 
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(input_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(input_data));
 }
 
 TEST(ReshapeTest, UnknownDimension)
@@ -60,8 +59,7 @@ TEST(ReshapeTest, UnknownDimension)
   kernel.configure();
   kernel.execute();
 
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(input_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(input_data));
 }
 
 } // namespace
diff --git a/compiler/luci-interpreter/src/kernels/ResizeBilinear.cpp b/compiler/luci-interpreter/src/kernels/ResizeBilinear.cpp
new file mode 100644
index 000000000..9385855cf
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ResizeBilinear.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ResizeBilinear.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ResizeBilinear::ResizeBilinear(const Tensor *input, const Tensor *size, Tensor *output,
+                               const ResizeBilinearParams &params)
+    : KernelWithParams<ResizeBilinearParams>({input, size}, {output}, params)
+{
+}
+
+void ResizeBilinear::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(size()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(size()->element_type() == DataType::S32);
+  if (params().half_pixel_centers && params().align_corners)
+    throw std::runtime_error("If half_pixel_centers is True, align_corners must be False.");
+  LUCI_INTERPRETER_CHECK(size()->shape().dim(0) == 2);
+  Shape output_shape(4);
+  output_shape.dim(0) = input()->shape().dim(0);
+  output_shape.dim(1) = getTensorData<int32_t>(size())[0];
+  output_shape.dim(2) = getTensorData<int32_t>(size())[1];
+  output_shape.dim(3) = input()->shape().dim(3);
+  output()->resize(output_shape);
+}
+
+void ResizeBilinear::execute() const
+{
+  tflite::ResizeBilinearParams op_params{};
+  op_params.align_corners = params().align_corners;
+  op_params.half_pixel_centers = params().half_pixel_centers;
+  switch (output()->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::optimized_ops::ResizeBilinear(
+          op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(size()),
+          getTensorData<int32_t>(size()), getTensorShape(output()), getTensorData<float>(output()));
+      break;
+    case DataType::U8:
+      tflite::optimized_ops::ResizeBilinear(
+          op_params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+          getTensorShape(size()), getTensorData<int32_t>(size()), getTensorShape(output()),
+          getTensorData<uint8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ResizeBilinear.h b/compiler/luci-interpreter/src/kernels/ResizeBilinear.h
new file mode 100644
index 000000000..b7bdc2ab7
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ResizeBilinear.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RESIZEBILINEAR_H
+#define LUCI_INTERPRETER_KERNELS_RESIZEBILINEAR_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ResizeBilinear : public KernelWithParams<ResizeBilinearParams>
+{
+public:
+  ResizeBilinear(const Tensor *input, const Tensor *shape, Tensor *output,
+                 const ResizeBilinearParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *size() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RESIZEBILINEAR_H
diff --git a/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp b/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp
new file mode 100644
index 000000000..51c1359da
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ResizeBilinear.test.cpp
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ResizeBilinear.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> size_shape,
+           std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data,
+           std::initializer_list<int32_t> size_data, std::initializer_list<float> output_data,
+           bool align_corners, bool half_pixel_centers)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeBilinearParams params{};
+  params.align_corners = align_corners;
+  params.half_pixel_centers = half_pixel_centers;
+
+  ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
+}
+
+template <>
+void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
+                    std::initializer_list<int32_t> size_shape,
+                    std::initializer_list<int32_t> output_shape,
+                    std::initializer_list<float> input_data,
+                    std::initializer_list<int32_t> size_data,
+                    std::initializer_list<float> output_data, bool align_corners,
+                    bool half_pixel_centers)
+{
+  // On TFlite example use Uint8 value it self, so this means quant param scale 1.0f and zero
+  // point 0.
+  Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, 1.0, 0, input_data);
+  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data);
+  Tensor output_tensor = makeOutputTensor(DataType::U8, 1.0, 0);
+
+  ResizeBilinearParams params{};
+  params.align_corners = align_corners;
+  params.half_pixel_centers = half_pixel_centers;
+
+  ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, output_tensor.scale()));
+}
+
+template <typename T> class ResizeBilinearTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_CASE(ResizeBilinearTest, DataTypes);
+
+TYPED_TEST(ResizeBilinearTest, SimpleTest)
+{
+  Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+                   {
+                       3, 6,  //
+                       9, 12, //
+                       4, 10, //
+                       10, 16 //
+                   },
+                   {3, 3},
+                   {
+                       3, 5, 6,    //
+                       7, 9, 10,   //
+                       9, 11, 12,  //
+                       4, 8, 10,   //
+                       8, 12, 14,  //
+                       10, 14, 16, //
+                   },
+                   false, false);
+  SUCCEED();
+}
+
+TEST(ResizeBilinearTest, HalfPixelCenterFloatTest)
+{
+  Check<float>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+               {
+                   1, 2, //
+                   3, 4, //
+                   1, 2, //
+                   3, 4  //
+               },
+               {3, 3},
+               {
+                   1, 1.5, 2, //
+                   2, 2.5, 3, //
+                   3, 3.5, 4, //
+                   1, 1.5, 2, //
+                   2, 2.5, 3, //
+                   3, 3.5, 4, //
+               },
+               false, true);
+  SUCCEED();
+}
+
+TEST(ResizeBilinearTest, HalfPixelCenterUint8Test)
+{
+  Check<uint8_t>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+                 {
+                     3, 6,  //
+                     9, 12, //
+                     4, 10, //
+                     12, 16 //
+                 },
+                 {3, 3},
+                 {
+                     2, 4, 6,    //
+                     6, 7, 9,    //
+                     9, 10, 12,  //
+                     4, 7, 10,   //
+                     8, 10, 13,  //
+                     12, 14, 16, //
+                 },
+                 false, true);
+  SUCCEED();
+}
+
+TEST(ResizeBilinearTest, InputShapeInvalid_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2}, {
+                                                                          3, 6,  //
+                                                                          9, 12, //
+                                                                          4, 10, //
+                                                                          10, 16 //
+                                                                      });
+  Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3});
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeBilinearParams params{};
+  params.align_corners = false;
+  params.half_pixel_centers = false;
+
+  ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ResizeBilinearTest, SizeShapeInvalid_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, {
+                                                                             3, 6,  //
+                                                                             9, 12, //
+                                                                             4, 10, //
+                                                                             10, 16 //
+                                                                         });
+  Tensor size_tensor = makeInputTensor<DataType::S32>({2, 1}, {3, 3});
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeBilinearParams params{};
+  params.align_corners = false;
+  params.half_pixel_centers = false;
+
+  ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ResizeBilinearTest, SizeDimInvalid_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, {
+                                                                             3, 6,  //
+                                                                             9, 12, //
+                                                                             4, 10, //
+                                                                             10, 16 //
+                                                                         });
+  Tensor size_tensor = makeInputTensor<DataType::S32>({3}, {3, 3, 1});
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeBilinearParams params{};
+  params.align_corners = false;
+  params.half_pixel_centers = false;
+
+  ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ResizeBilinearTest, InvalidParams_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, {
+                                                                             3, 6,  //
+                                                                             9, 12, //
+                                                                             4, 10, //
+                                                                             10, 16 //
+                                                                         });
+  Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3});
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeBilinearParams params{};
+  params.align_corners = true;
+  params.half_pixel_centers = true;
+
+  ResizeBilinear kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp
new file mode 100644
index 000000000..e4ad8f742
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ResizeNearestNeighbor.h"
+
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+ResizeNearestNeighbor::ResizeNearestNeighbor(const Tensor *input, const Tensor *size,
+                                             Tensor *output,
+                                             const ResizeNearestNeighborParams &params)
+    : KernelWithParams<ResizeNearestNeighborParams>({input, size}, {output}, params)
+{
+}
+
+void ResizeNearestNeighbor::configure()
+{
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() == 4);
+  LUCI_INTERPRETER_CHECK(size()->shape().num_dims() == 1);
+  LUCI_INTERPRETER_CHECK(size()->element_type() == DataType::S32);
+  LUCI_INTERPRETER_CHECK(size()->shape().dim(0) == 2);
+  Shape output_shape(4);
+  output_shape.dim(0) = input()->shape().dim(0);
+  output_shape.dim(1) = getTensorData<int32_t>(size())[0];
+  output_shape.dim(2) = getTensorData<int32_t>(size())[1];
+  output_shape.dim(3) = input()->shape().dim(3);
+  output()->resize(output_shape);
+}
+
+void ResizeNearestNeighbor::execute() const
+{
+  tflite::ResizeNearestNeighborParams op_params{};
+  op_params.align_corners = params().align_corners;
+  op_params.half_pixel_centers = params().half_pixel_centers;
+  switch (output()->element_type())
+  {
+    case DataType::FLOAT32:
+      tflite::reference_ops::ResizeNearestNeighbor(
+          op_params, getTensorShape(input()), getTensorData<int32_t>(input()),
+          getTensorShape(size()), getTensorData<int32_t>(size()), getTensorShape(output()),
+          getTensorData<int32_t>(output()));
+      break;
+    case DataType::U8:
+      tflite::optimized_ops::ResizeNearestNeighbor(
+          op_params, getTensorShape(input()), getTensorData<uint8_t>(input()),
+          getTensorShape(size()), getTensorData<int32_t>(size()), getTensorShape(output()),
+          getTensorData<uint8_t>(output()));
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.h b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.h
new file mode 100644
index 000000000..137d031cf
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_RESIZENEARESTNEIGHBOR_H
+#define LUCI_INTERPRETER_KERNELS_RESIZENEARESTNEIGHBOR_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class ResizeNearestNeighbor : public KernelWithParams<ResizeNearestNeighborParams>
+{
+public:
+  ResizeNearestNeighbor(const Tensor *input, const Tensor *shape, Tensor *output,
+                        const ResizeNearestNeighborParams &params);
+
+  const Tensor *input() const { return _inputs[0]; }
+  const Tensor *size() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_RESIZENEARESTNEIGHBOR_H
diff --git a/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp
new file mode 100644
index 000000000..9a804cca7
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/ResizeNearestNeighbor.test.cpp
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/ResizeNearestNeighbor.h"
+#include "kernels/TestUtils.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> size_shape,
+           std::initializer_list<int32_t> output_shape, std::initializer_list<float> input_data,
+           std::initializer_list<int32_t> size_data, std::initializer_list<float> output_data,
+           bool align_corners, bool half_pixel_centers)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data);
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeNearestNeighborParams params{};
+  params.align_corners = align_corners;
+  params.half_pixel_centers = half_pixel_centers;
+
+  ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
+}
+
+template <>
+void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
+                    std::initializer_list<int32_t> size_shape,
+                    std::initializer_list<int32_t> output_shape,
+                    std::initializer_list<float> input_data,
+                    std::initializer_list<int32_t> size_data,
+                    std::initializer_list<float> output_data, bool align_corners,
+                    bool half_pixel_centers)
+{
+  std::pair<float, int32_t> quant_param =
+      quantizationParams<uint8_t>(std::min(input_data) < 0 ? std::min(input_data) : 0.f,
+                                  std::max(input_data) > 0 ? std::max(input_data) : 0.f);
+  Tensor input_tensor =
+      makeInputTensor<DataType::U8>(input_shape, quant_param.first, quant_param.second, input_data);
+  Tensor size_tensor = makeInputTensor<DataType::S32>(size_shape, size_data);
+  Tensor output_tensor = makeOutputTensor(DataType::U8, quant_param.first, quant_param.first);
+
+  ResizeNearestNeighborParams params{};
+  params.align_corners = align_corners;
+  params.half_pixel_centers = half_pixel_centers;
+
+  ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, output_tensor.scale()));
+}
+
+template <typename T> class ResizeNearestNeighborTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_CASE(ResizeNearestNeighborTest, DataTypes);
+
+TYPED_TEST(ResizeNearestNeighborTest, SimpleTest)
+{
+  Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+                   {
+                       3, 6,  //
+                       9, 12, //
+                       4, 10, //
+                       10, 16 //
+                   },
+                   {3, 3},
+                   {
+                       3, 3, 6,    //
+                       3, 3, 6,    //
+                       9, 9, 12,   //
+                       4, 4, 10,   //
+                       4, 4, 10,   //
+                       10, 10, 16, //
+                   },
+                   false, false);
+}
+
+TYPED_TEST(ResizeNearestNeighborTest, AlignCenterTest)
+{
+  Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+                   {
+                       3, 6,  //
+                       9, 12, //
+                       4, 10, //
+                       10, 16 //
+                   },
+                   {3, 3},
+                   {
+                       3, 6, 6,    //
+                       9, 12, 12,  //
+                       9, 12, 12,  //
+                       4, 10, 10,  //
+                       10, 16, 16, //
+                       10, 16, 16, //
+                   },
+                   true, false);
+}
+
+TYPED_TEST(ResizeNearestNeighborTest, HalfPixelCenterTest)
+{
+  Check<TypeParam>({2, 2, 2, 1}, {2}, {2, 3, 3, 1},
+                   {
+                       3, 6,  //
+                       9, 12, //
+                       4, 10, //
+                       10, 16 //
+                   },
+                   {3, 3},
+                   {
+                       3, 6, 6,    //
+                       9, 12, 12,  //
+                       9, 12, 12,  //
+                       4, 10, 10,  //
+                       10, 16, 16, //
+                       10, 16, 16, //
+                   },
+                   false, true);
+}
+
+TEST(ResizeNearestNeighborTest, InputShapeInvalid_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2}, {
+                                                                          3, 6,  //
+                                                                          9, 12, //
+                                                                          4, 10, //
+                                                                          10, 16 //
+                                                                      });
+  Tensor size_tensor = makeInputTensor<DataType::S32>({2}, {3, 3});
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeNearestNeighborParams params{};
+  params.align_corners = false;
+  params.half_pixel_centers = false;
+
+  ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ResizeNearestNeighborTest, SizeShapeInvalid_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, {
+                                                                             3, 6,  //
+                                                                             9, 12, //
+                                                                             4, 10, //
+                                                                             10, 16 //
+                                                                         });
+  Tensor size_tensor = makeInputTensor<DataType::S32>({2, 1}, {3, 3});
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeNearestNeighborParams params{};
+  params.align_corners = false;
+  params.half_pixel_centers = false;
+
+  ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(ResizeNearestNeighborTest, SizeDimInvalid_NEG)
+{
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>({2, 2, 2, 1}, {
+                                                                             3, 6,  //
+                                                                             9, 12, //
+                                                                             4, 10, //
+                                                                             10, 16 //
+                                                                         });
+  Tensor size_tensor = makeInputTensor<DataType::S32>({3}, {3, 3, 1});
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  ResizeNearestNeighborParams params{};
+  params.align_corners = false;
+  params.half_pixel_centers = false;
+
+  ResizeNearestNeighbor kernel(&input_tensor, &size_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp b/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp
index 69b55d2f2..d33b800be 100644
--- a/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Rsqrt.test.cpp
@@ -29,17 +29,14 @@ using namespace testing;
 void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
            std::initializer_list<float> input_data, std::initializer_list<float> output_data)
 {
-  Tensor input_tensor{DataType::FLOAT32, input_shape, {}, ""};
-  input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(float));
-
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   Rsqrt kernel(&input_tensor, &output_tensor);
   kernel.configure();
   kernel.execute();
 
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ::testing::ElementsAreArray(ArrayFloatNear(output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
 }
 
diff --git a/compiler/luci-interpreter/src/kernels/Softmax.cpp b/compiler/luci-interpreter/src/kernels/Softmax.cpp
index 2fb7f3f2c..642c0ad75 100644
--- a/compiler/luci-interpreter/src/kernels/Softmax.cpp
+++ b/compiler/luci-interpreter/src/kernels/Softmax.cpp
@@ -19,6 +19,7 @@
 #include "kernels/Utils.h"
 
 #include <tensorflow/lite/kernels/internal/reference/softmax.h>
+#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
 
 #include <stdexcept>
 
@@ -35,7 +36,15 @@ Softmax::Softmax(const Tensor *input, Tensor *output, const SoftmaxParams &param
 
 void Softmax::configure()
 {
-  assert(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(input()->element_type() == output()->element_type());
+  LUCI_INTERPRETER_CHECK(input()->shape().num_dims() >= 1);
+  if (input()->element_type() == DataType::U8 || input()->element_type() == DataType::S8)
+  {
+    LUCI_INTERPRETER_CHECK(output()->zero_point() == 0);
+    tflite::SoftmaxParams op_params{};
+    op_params.table = _table;
+    tflite::optimized_ops::PopulateSoftmaxLookupTable(&op_params, input()->scale(), params().beta);
+  }
   output()->resize(input()->shape());
 }
 
@@ -46,6 +55,12 @@ void Softmax::execute() const
     case DataType::FLOAT32:
       evalFloat();
       break;
+    case DataType::S8:
+      evalQuantized<int8_t>();
+      break;
+    case DataType::U8:
+      evalQuantized<uint8_t>();
+      break;
     default:
       throw std::runtime_error("Unsupported type.");
   }
@@ -53,12 +68,23 @@ void Softmax::execute() const
 
 void Softmax::evalFloat() const
 {
-  tflite::SoftmaxParams params{};
-  params.beta = _params.beta;
+  tflite::SoftmaxParams op_params{};
+  op_params.beta = params().beta;
 
-  tflite::reference_ops::Softmax(params, getTensorShape(input()), getTensorData<float>(input()),
+  tflite::reference_ops::Softmax(op_params, getTensorShape(input()), getTensorData<float>(input()),
                                  getTensorShape(output()), getTensorData<float>(output()));
 }
 
+template <typename T> void Softmax::evalQuantized() const
+{
+  tflite::SoftmaxParams op_params{};
+  op_params.table = const_cast<float *>(_table);
+  op_params.zero_point = output()->zero_point();
+  op_params.scale = output()->scale();
+
+  tflite::optimized_ops::Softmax(op_params, getTensorShape(input()), getTensorData<T>(input()),
+                                 getTensorShape(output()), getTensorData<T>(output()));
+}
+
 } // namespace kernels
 } // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Softmax.h b/compiler/luci-interpreter/src/kernels/Softmax.h
index 2e4eda492..1f281df1c 100644
--- a/compiler/luci-interpreter/src/kernels/Softmax.h
+++ b/compiler/luci-interpreter/src/kernels/Softmax.h
@@ -38,6 +38,9 @@ public:
 
 private:
   void evalFloat() const;
+  template <typename T> void evalQuantized() const;
+
+  float _table[256];
 };
 
 } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/Softmax.test.cpp b/compiler/luci-interpreter/src/kernels/Softmax.test.cpp
index 2193c3e83..d3d8209a5 100644
--- a/compiler/luci-interpreter/src/kernels/Softmax.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Softmax.test.cpp
@@ -26,15 +26,10 @@ namespace
 
 using namespace testing;
 
-TEST(SoftmaxTest, Float)
+template <typename T>
+void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
+           std::initializer_list<float> input_data, std::initializer_list<float> output_data)
 {
-  Shape input_shape{2, 1, 2, 3};
-  std::vector<float> input_data{
-      5,  -9, 8,  //
-      -7, 2,  -4, //
-      1,  -2, 9,  //
-      3,  -6, -1, //
-  };
   Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
@@ -45,14 +40,61 @@ TEST(SoftmaxTest, Float)
   kernel.configure();
   kernel.execute();
 
-  std::vector<float> ref_output_data{
-      0.38514, 0.09497, 0.51989, //
-      0.20792, 0.51141, 0.28067, //
-      0.25212, 0.18678, 0.56110, //
-      0.48149, 0.19576, 0.32275, //
-  };
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<T>(output_tensor), FloatArrayNear(output_data));
+  EXPECT_THAT(extractTensorShape(output_tensor), output_shape);
+}
+
+template <>
+void Check<uint8_t>(std::initializer_list<int32_t> input_shape,
+                    std::initializer_list<int32_t> output_shape,
+                    std::initializer_list<float> input_data,
+                    std::initializer_list<float> output_data)
+{
+  std::pair<float, int32_t> input_quant_param =
+      quantizationParams<uint8_t>(std::min<float>(std::min<float>(input_data), 0.f),
+                                  std::max<float>(std::max<float>(input_data), 0.f));
+  std::pair<float, int32_t> output_quant_param =
+      quantizationParams<uint8_t>(std::min<float>(std::min<float>(output_data), 0.f),
+                                  std::max<float>(std::max<float>(output_data), 0.f));
+  Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, input_quant_param.first,
+                                                      input_quant_param.second, input_data);
+  Tensor output_tensor =
+      makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
+
+  SoftmaxParams params{};
+  params.beta = 0.1;
+
+  Softmax kernel(&input_tensor, &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
+
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
+  EXPECT_THAT(dequantizeTensorData(output_tensor),
+              FloatArrayNear(output_data, output_tensor.scale()));
+}
+
+template <typename T> class SoftmaxTest : public ::testing::Test
+{
+};
+
+using DataTypes = ::testing::Types<float, uint8_t>;
+TYPED_TEST_CASE(SoftmaxTest, DataTypes);
+
+TYPED_TEST(SoftmaxTest, Simple)
+{
+  Check<TypeParam>({2, 1, 2, 3}, {2, 1, 2, 3},
+                   {
+                       5, -9, 8,  //
+                       -7, 2, -4, //
+                       1, -2, 9,  //
+                       3, -6, -1, //
+                   },
+                   {
+                       0.38514, 0.09497, 0.51989, //
+                       0.20792, 0.51141, 0.28067, //
+                       0.25212, 0.18678, 0.56110, //
+                       0.48149, 0.19576, 0.32275, //
+                   });
 }
 
 } // namespace
diff --git a/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp b/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp
index e4a0fd642..77b6655dc 100644
--- a/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/SpaceToDepth.test.cpp
@@ -35,13 +35,13 @@ TYPED_TEST_CASE(SpaceToDepthTest, DataTypes);
 
 TYPED_TEST(SpaceToDepthTest, SimpleCase)
 {
+  constexpr DataType element_type = getElementType<TypeParam>();
   std::vector<TypeParam> input_data{1, 5, 6, 7, 2, 3, 4, 8};
   Shape input_shape{1, 2, 2, 2};
-  Tensor input_tensor{getElementType<TypeParam>(), input_shape, {{}, {}}, ""};
-  input_tensor.writeData(input_data.data(), input_data.size() * sizeof(TypeParam));
+  Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
   std::vector<TypeParam> output_data{1, 5, 6, 7, 2, 3, 4, 8};
   std::vector<int32_t> output_shape{1, 1, 1, 8};
-  Tensor output_tensor = makeOutputTensor(getElementType<TypeParam>());
+  Tensor output_tensor = makeOutputTensor(element_type);
 
   SpaceToDepthParams params{};
   params.block_size = 2;
diff --git a/compiler/luci-interpreter/src/kernels/Split.test.cpp b/compiler/luci-interpreter/src/kernels/Split.test.cpp
index 11d0b1ea9..2147d15c1 100644
--- a/compiler/luci-interpreter/src/kernels/Split.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Split.test.cpp
@@ -30,11 +30,11 @@ using namespace testing;
 template <typename T>
 void Check(int axis, int num_splits, std::initializer_list<int32_t> input_shape,
            std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data,
-           std::vector<std::vector<T>> output_data, DataType element_type)
+           std::vector<std::vector<T>> output_data)
 {
+  constexpr DataType element_type = getElementType<T>();
   Tensor axis_tensor = makeInputTensor<DataType::S32>({}, {axis});
-  Tensor input_tensor{element_type, input_shape, {}, ""};
-  input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(T));
+  Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
 
   std::vector<Tensor> output_tensors;
   output_tensors.reserve(num_splits);
@@ -74,51 +74,42 @@ TYPED_TEST(SplitTest, FourDimensional)
                    {
                        {1, 2, 3, 4, 5, 6, 7, 8},        //
                        {9, 10, 11, 12, 13, 14, 15, 16}, //
-                   },
-                   getElementType<TypeParam>());
+                   });
   Check<TypeParam>(
       /*axis=*/1, /*num_splits=*/2, {2, 2, 2, 2}, {2, 1, 2, 2},
-      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
-      {
-          {1, 2, 3, 4, 9, 10, 11, 12},  //
-          {5, 6, 7, 8, 13, 14, 15, 16}, //
-      },
-      getElementType<TypeParam>());
+      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, {
+                                                                   {1, 2, 3, 4, 9, 10, 11, 12},  //
+                                                                   {5, 6, 7, 8, 13, 14, 15, 16}, //
+                                                               });
   Check<TypeParam>(
       /*axis=*/2, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 1, 2},
-      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
-      {
-          {1, 2, 5, 6, 9, 10, 13, 14},  //
-          {3, 4, 7, 8, 11, 12, 15, 16}, //
-      },
-      getElementType<TypeParam>());
+      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, {
+                                                                   {1, 2, 5, 6, 9, 10, 13, 14},  //
+                                                                   {3, 4, 7, 8, 11, 12, 15, 16}, //
+                                                               });
   Check<TypeParam>(
       /*axis=*/3, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 2, 1},
-      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
-      {
-          {1, 3, 5, 7, 9, 11, 13, 15},  //
-          {2, 4, 6, 8, 10, 12, 14, 16}, //
-      },
-      getElementType<TypeParam>());
+      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, {
+                                                                   {1, 3, 5, 7, 9, 11, 13, 15},  //
+                                                                   {2, 4, 6, 8, 10, 12, 14, 16}, //
+                                                               });
 }
 
 TYPED_TEST(SplitTest, OneDimensional)
 {
   Check<TypeParam>(
       /*axis=*/0, /*num_splits=*/8, {8}, {1}, {1, 2, 3, 4, 5, 6, 7, 8},
-      {{1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}}, getElementType<TypeParam>());
+      {{1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}});
 }
 
 TYPED_TEST(SplitTest, NegativeAxis)
 {
   Check<TypeParam>(
       /*axis=*/-4, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2},
-      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
-      {
-          {1, 2, 3, 4, 5, 6, 7, 8}, //
-          {9, 10, 11, 12, 13, 14, 15, 16},
-      },
-      getElementType<TypeParam>());
+      {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, {
+                                                                   {1, 2, 3, 4, 5, 6, 7, 8}, //
+                                                                   {9, 10, 11, 12, 13, 14, 15, 16},
+                                                               });
 }
 
 } // namespace
diff --git a/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp b/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp
index cdd208280..504db4493 100644
--- a/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Sqrt.test.cpp
@@ -29,17 +29,14 @@ using namespace testing;
 void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
            std::initializer_list<float> input_data, std::initializer_list<float> output_data)
 {
-  Tensor input_tensor{DataType::FLOAT32, input_shape, {}, ""};
-  input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(float));
-
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
   Sqrt kernel(&input_tensor, &output_tensor);
   kernel.configure();
   kernel.execute();
 
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ::testing::ElementsAreArray(ArrayFloatNear(output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
 }
 
diff --git a/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp b/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp
index 3a34284dd..ff9fb09d2 100644
--- a/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Squeeze.test.cpp
@@ -29,17 +29,14 @@ using namespace testing;
 template <typename T>
 void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> output_shape,
            std::initializer_list<T> input_data, std::initializer_list<T> output_data,
-           DataType element_type, std::vector<int32_t> squeeze_dims)
+           std::initializer_list<int32_t> squeeze_dims)
 {
-  Tensor input_tensor{element_type, input_shape, {}, ""};
-  input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(T));
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
   Tensor output_tensor = makeOutputTensor(element_type);
 
   SqueezeParams params{};
-  for (size_t i = 0; i < squeeze_dims.size(); i++)
-  {
-    params.squeeze_dims.push_back(squeeze_dims.at(i));
-  }
+  params.squeeze_dims = squeeze_dims;
 
   Squeeze kernel(&input_tensor, &output_tensor, params);
   kernel.configure();
@@ -64,7 +61,7 @@ TYPED_TEST(SqueezeTest, TotalTest)
                       13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
       /*output_data=*/{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12,
                        13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24},
-      getElementType<TypeParam>(), {-1, 0});
+      {-1, 0});
 }
 
 } // namespace
diff --git a/compiler/luci-interpreter/src/kernels/StridedSlice.test.cpp b/compiler/luci-interpreter/src/kernels/StridedSlice.test.cpp
index 5ab06e2ec..66dffcaf2 100644
--- a/compiler/luci-interpreter/src/kernels/StridedSlice.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/StridedSlice.test.cpp
@@ -36,17 +36,12 @@ TEST(StridedSliceTest, Float)
   std::vector<int32_t> end_data{1, 3, 2};
   Shape strides_shape{3};
   std::vector<int32_t> strides_data{1, 1, 1};
-  Tensor input_tensor{DataType::FLOAT32, input_shape, {}, ""};
-  Tensor begin_tensor{DataType::S32, begin_shape, {}, ""};
-  Tensor end_tensor{DataType::S32, end_shape, {}, ""};
-  Tensor strides_tensor{DataType::S32, strides_shape, {}, ""};
+  Tensor input_tensor = makeInputTensor<DataType::FLOAT32>(input_shape, input_data);
+  Tensor begin_tensor = makeInputTensor<DataType::S32>(begin_shape, begin_data);
+  Tensor end_tensor = makeInputTensor<DataType::S32>(end_shape, end_data);
+  Tensor strides_tensor = makeInputTensor<DataType::S32>(strides_shape, strides_data);
   Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
 
-  input_tensor.writeData(input_data.data(), input_data.size() * sizeof(float));
-  begin_tensor.writeData(begin_data.data(), begin_data.size() * sizeof(int32_t));
-  end_tensor.writeData(end_data.data(), end_data.size() * sizeof(int32_t));
-  strides_tensor.writeData(strides_data.data(), strides_data.size() * sizeof(int32_t));
-
   StridedSliceParams params{};
   params.begin_mask = 0;
   params.end_mask = 0;
@@ -61,8 +56,7 @@ TEST(StridedSliceTest, Float)
 
   std::vector<int32_t> output_shape{3, 2};
   std::vector<float> output_data{1, 2, 3, 4, 5, 6};
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(output_data));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
 }
 
@@ -70,24 +64,18 @@ TEST(StridedSliceTest, Uint8)
 {
   Shape input_shape{2, 3, 2};
   std::vector<float> input_data{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
-  std::vector<uint8_t> quant_input_data = quantize<uint8_t>(input_data, 1.0f, 0);
   Shape begin_shape{3};
   std::vector<int32_t> begin_data{0, 0, 0};
   Shape end_shape{3};
   std::vector<int32_t> end_data{1, 3, 2};
   Shape strides_shape{3};
   std::vector<int32_t> strides_data{1, 1, 1};
-  Tensor input_tensor{DataType::U8, input_shape, {{1.0f}, {0}}, ""};
-  Tensor begin_tensor{DataType::S32, begin_shape, {}, ""};
-  Tensor end_tensor{DataType::S32, end_shape, {}, ""};
-  Tensor strides_tensor{DataType::S32, strides_shape, {}, ""};
+  Tensor input_tensor = makeInputTensor<DataType::U8>(input_shape, 1.0f, 0, input_data);
+  Tensor begin_tensor = makeInputTensor<DataType::S32>(begin_shape, begin_data);
+  Tensor end_tensor = makeInputTensor<DataType::S32>(end_shape, end_data);
+  Tensor strides_tensor = makeInputTensor<DataType::S32>(strides_shape, strides_data);
   Tensor output_tensor = makeOutputTensor(DataType::U8, 1.0f, 0);
 
-  input_tensor.writeData(quant_input_data.data(), quant_input_data.size() * sizeof(uint8_t));
-  begin_tensor.writeData(begin_data.data(), begin_data.size() * sizeof(int32_t));
-  end_tensor.writeData(end_data.data(), end_data.size() * sizeof(int32_t));
-  strides_tensor.writeData(strides_data.data(), strides_data.size() * sizeof(int32_t));
-
   StridedSliceParams params{};
   params.begin_mask = 0;
   params.end_mask = 0;
@@ -102,9 +90,7 @@ TEST(StridedSliceTest, Uint8)
 
   std::vector<int32_t> output_shape{3, 2};
   std::vector<float> output_data{1, 2, 3, 4, 5, 6};
-  EXPECT_THAT(dequantize(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(),
-                         output_tensor.zero_point()),
-              ElementsAreArray(ArrayFloatNear(output_data)));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(output_data));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape));
 }
 
diff --git a/compiler/luci-interpreter/src/kernels/Sub.cpp b/compiler/luci-interpreter/src/kernels/Sub.cpp
new file mode 100644
index 000000000..dd9c1102f
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Sub.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Sub.h"
+#include "kernels/Utils.h"
+
+#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h>
+
+#include <stdexcept>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+Sub::Sub(const Tensor *input1, const Tensor *input2, Tensor *output, const SubParams &params)
+    : KernelWithParams<SubParams>({input1, input2}, {output}, params)
+{
+}
+
+void Sub::configure()
+{
+  LUCI_INTERPRETER_CHECK(!(input1()->element_type() != input2()->element_type()))
+  output()->resize(calculateShapeForBroadcast(input1()->shape(), input2()->shape()));
+}
+
+void Sub::execute() const
+{
+  switch (input1()->element_type())
+  {
+    case DataType::FLOAT32:
+      evalFloat();
+      break;
+    case DataType::U8:
+      evalQuantized();
+      break;
+    default:
+      throw std::runtime_error("Unsupported type.");
+  }
+}
+
+void Sub::evalFloat() const
+{
+  float activation_min{};
+  float activation_max{};
+  calculateActivationRange(_params.activation, &activation_min, &activation_max);
+
+  tflite::ArithmeticParams params{};
+  params.float_activation_min = activation_min;
+  params.float_activation_max = activation_max;
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+      getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastSubSlow(
+        params, getTensorShape(input1()), getTensorData<float>(input1()), getTensorShape(input2()),
+        getTensorData<float>(input2()), getTensorShape(output()), getTensorData<float>(output()));
+  }
+  else
+  {
+    tflite::optimized_ops::Sub(params, getTensorShape(input1()), getTensorData<float>(input1()),
+                               getTensorShape(input2()), getTensorData<float>(input2()),
+                               getTensorShape(output()), getTensorData<float>(output()));
+  }
+}
+
+void Sub::evalQuantized() const
+{
+  const auto input1_scale = static_cast<double>(input1()->scale());
+  const auto input2_scale = static_cast<double>(input2()->scale());
+  const auto output_scale = static_cast<double>(output()->scale());
+
+  const int left_shift = 20;
+  const double twice_max_input_scale = 2 * std::max(input1_scale, input2_scale);
+  const double real_input1_multiplier = input1_scale / twice_max_input_scale;
+  const double real_input2_multiplier = input2_scale / twice_max_input_scale;
+  const double real_output_multiplier = twice_max_input_scale / ((1 << left_shift) * output_scale);
+
+  int32_t input1_multiplier{}, input2_multiplier{}, output_multiplier{};
+  int input1_shift{}, input2_shift{}, output_shift{};
+  quantizeMultiplierSmallerThanOneExp(real_input1_multiplier, &input1_multiplier, &input1_shift);
+  quantizeMultiplierSmallerThanOneExp(real_input2_multiplier, &input2_multiplier, &input2_shift);
+  quantizeMultiplierSmallerThanOneExp(real_output_multiplier, &output_multiplier, &output_shift);
+
+  int32_t activation_min{};
+  int32_t activation_max{};
+  calculateActivationRangeQuantized(_params.activation, output(), &activation_min, &activation_max);
+
+  tflite::ArithmeticParams params{};
+  params.left_shift = left_shift;
+  // The kernel expects inputs' zero points to be negated.
+  params.input1_offset = -input1()->zero_point(); // Note the '-'.
+  params.input1_multiplier = input1_multiplier;
+  params.input1_shift = input1_shift;
+  params.input2_offset = -input2()->zero_point(); // Note the '-'.
+  params.input2_multiplier = input2_multiplier;
+  params.input2_shift = input2_shift;
+  params.output_offset = output()->zero_point();
+  params.output_multiplier = output_multiplier;
+  params.output_shift = output_shift;
+  params.quantized_activation_min = activation_min;
+  params.quantized_activation_max = activation_max;
+
+  const bool need_broadcast = tflite::reference_ops::ProcessBroadcastShapes(
+      getTensorShape(input1()), getTensorShape(input2()), &params);
+
+  if (need_broadcast)
+  {
+    tflite::reference_ops::BroadcastSubSlow(
+        params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
+        getTensorShape(input2()), getTensorData<uint8_t>(input2()), getTensorShape(output()),
+        getTensorData<uint8_t>(output()));
+  }
+  else
+  {
+    tflite::reference_ops::Sub(params, getTensorShape(input1()), getTensorData<uint8_t>(input1()),
+                               getTensorShape(input2()), getTensorData<uint8_t>(input2()),
+                               getTensorShape(output()), getTensorData<uint8_t>(output()));
+  }
+}
+
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Sub.h b/compiler/luci-interpreter/src/kernels/Sub.h
new file mode 100644
index 000000000..d7940b5c6
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Sub.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LUCI_INTERPRETER_KERNELS_SUB_H
+#define LUCI_INTERPRETER_KERNELS_SUB_H
+
+#include "core/Kernel.h"
+#include "core/KernelParams.h"
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+
+class Sub : public KernelWithParams<SubParams>
+{
+public:
+  Sub(const Tensor *input1, const Tensor *input2, Tensor *output, const SubParams &params);
+
+  const Tensor *input1() const { return _inputs[0]; }
+  const Tensor *input2() const { return _inputs[1]; }
+  Tensor *output() const { return _outputs[0]; }
+
+  void configure() override;
+  void execute() const override;
+
+private:
+  void evalFloat() const;
+  void evalQuantized() const;
+};
+
+} // namespace kernels
+} // namespace luci_interpreter
+
+#endif // LUCI_INTERPRETER_KERNELS_SUB_H
diff --git a/compiler/luci-interpreter/src/kernels/Sub.test.cpp b/compiler/luci-interpreter/src/kernels/Sub.test.cpp
new file mode 100644
index 000000000..9f77fe7e0
--- /dev/null
+++ b/compiler/luci-interpreter/src/kernels/Sub.test.cpp
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernels/Sub.h"
+#include "kernels/TestUtils.h"
+
+#include <algorithm>
+
+namespace luci_interpreter
+{
+namespace kernels
+{
+namespace
+{
+
+using namespace testing;
+using std::pair;
+using std::vector;
+using std::transform;
+using std::initializer_list;
+
+// for quantized Add, the error shouldn't exceed step
+float GetTolerance(float min, float max)
+{
+  float kQuantizedStep = (max - min) / 255.0;
+  return kQuantizedStep;
+}
+
+TEST(SubTest, Uint8)
+{
+  Shape base_shape = {2, 3, 1, 2};
+  vector<float> base_data = {-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
+                             1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
+  vector<Shape> test_shapes = {{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  vector<float> test_data = {0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+  vector<vector<int32_t>> output_shapes = {{2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
+  vector<vector<float>> output_data = {
+      {-0.5f, 2.0f,  0.1f,  1.8f,  -1.3f, 1.4f,  0.7f, 0.2f,  1.3f, 0.0f,  -0.1f, -0.4f,
+       0.6f,  -1.4f, 1.2f,  -1.6f, -0.2f, -2.0f, 1.0f, 2.5f,  1.6f, 2.3f,  0.2f,  1.9f,
+       -1.8f, -0.3f, -1.2f, -0.5f, -2.6f, -0.9f, 0.5f, -2.5f, 1.1f, -2.7f, -0.3f, -3.0f},
+      {-0.5f, 2.0f, 1.3f, 0.0f, -0.2f, -2.0f, 1.0f, 2.5f, -1.2f, -0.5f, -0.3f, -3.0f},
+      {-0.5f, 2.1f,  -0.6f, 2.0f,  0.1f,  2.7f,  0.7f, 0.3f,  0.6f,  0.2f,  1.3f,  0.9f,
+       0.6f,  -1.3f, 0.5f,  -1.4f, 1.2f,  -0.7f, 0.7f, 2.3f,  0.2f,  1.8f,  0.3f,  1.9f,
+       -2.1f, -0.5f, -2.6f, -1.0f, -2.5f, -0.9f, 0.2f, -2.7f, -0.3f, -3.0f, -0.2f, -3.0f},
+      {-0.5f, 2.1f, 0.6f, 0.2f, 1.2f, -0.7f, 0.7f, 2.3f, -2.6f, -1.0f, -0.2f, -3.0f}};
+
+  float kQuantizedTolerance = GetTolerance(-3.f, 3.f);
+  pair<float, int32_t> quant_param = quantizationParams<uint8_t>(-3.f, 3.f);
+  for (size_t i = 0; i < output_data.size(); ++i)
+  {
+    Tensor input1_tensor =
+        makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, base_data);
+    Tensor input2_tensor = makeInputTensor<DataType::U8>(test_shapes[i], quant_param.first,
+                                                         quant_param.second, test_data);
+    Tensor output_tensor =
+        makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+
+    SubParams params{};
+    params.activation = Activation::NONE;
+
+    Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    kernel.execute();
+
+    EXPECT_THAT(dequantizeTensorData(output_tensor),
+                FloatArrayNear(output_data[i], kQuantizedTolerance));
+    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i]));
+  }
+
+  // Inversion step for output_data, because subtract is not commutative operation
+  auto multiply = [](auto &i) {
+    transform(i.begin(), i.end(), i.begin(), [](auto &value) { return value * -1.0f; });
+  };
+  for_each(output_data.begin(), output_data.end(), multiply);
+
+  // Re-run with exchanged inputs.
+  for (size_t i = 0; i < output_data.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DataType::U8>(test_shapes[i], quant_param.first,
+                                                         quant_param.second, test_data);
+    Tensor input2_tensor =
+        makeInputTensor<DataType::U8>(base_shape, quant_param.first, quant_param.second, base_data);
+    Tensor output_tensor =
+        makeOutputTensor(getElementType<uint8_t>(), quant_param.first, quant_param.second);
+
+    SubParams params{};
+    params.activation = Activation::NONE;
+
+    Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    kernel.execute();
+
+    EXPECT_THAT(dequantizeTensorData(output_tensor),
+                FloatArrayNear(output_data[i], kQuantizedTolerance));
+    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i]));
+  }
+}
+
+TEST(SubTest, Float)
+{
+  Shape base_shape = {2, 3, 1, 2};
+  vector<Shape> test_shapes{{1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  vector<vector<int32_t>> output_shapes{{2, 3, 3, 2}, {2, 3, 1, 2}, {2, 3, 3, 2}, {2, 3, 1, 2}};
+  vector<vector<float>> test_outputs = {
+      {0.0f, 2.0f, 0.1f, 1.8f, 0.0f, 1.4f, 0.7f, 0.2f, 1.3f, 0.0f, 0.0f, 0.0f,
+       0.6f, 0.0f, 1.2f, 0.0f, 0.0f, 0.0f, 1.0f, 2.5f, 1.6f, 2.3f, 0.2f, 1.9f,
+       0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.5f, 0.0f, 1.1f, 0.0f, 0.0f, 0.0f},
+      {0.0f, 2.0f, 1.3f, 0.0f, 0.0f, 0.0f, 1.0f, 2.5f, 0.0f, 0.0f, 0.0f, 0.0f},
+      {0.0f, 2.1f, 0.0f, 2.0f, 0.1f, 2.7f, 0.7f, 0.3f, 0.6f, 0.2f, 1.3f, 0.9f,
+       0.6f, 0.0f, 0.5f, 0.0f, 1.2f, 0.0f, 0.7f, 2.3f, 0.2f, 1.8f, 0.3f, 1.9f,
+       0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.2f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
+      {0.0f, 2.1f, 0.6f, 0.2f, 1.2f, 0.0f, 0.7f, 2.3f, 0.0f, 0.0f, 0.0f, 0.0f}};
+
+  vector<float> input1_data{-0.3f, 2.3f, 0.9f,  0.5f, 0.8f, -1.1f,
+                            1.2f,  2.8f, -1.6f, 0.0f, 0.7f, -2.2f};
+  vector<float> input2_data{0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f};
+  for (size_t i = 0; i < test_shapes.size(); ++i)
+  {
+    Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>(base_shape, input1_data);
+    Tensor input2_tensor = makeInputTensor<DataType::FLOAT32>(test_shapes[i], input2_data);
+    Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+    SubParams params{};
+    params.activation = Activation::RELU;
+
+    Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+    kernel.configure();
+    kernel.execute();
+
+    EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(test_outputs[i], 0.0001f))
+        << "With shape number " << i;
+
+    EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shapes[i]));
+  }
+}
+
+TEST(SubTest, Input_Output_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::FLOAT32>({1}, {1.f});
+  Tensor input2_tensor = makeInputTensor<DataType::S32>({1}, {2});
+  Tensor output_tensor = makeOutputTensor(DataType::FLOAT32);
+
+  SubParams params{};
+  params.activation = Activation::RELU;
+
+  Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  EXPECT_ANY_THROW(kernel.configure());
+}
+
+TEST(SubTest, Invalid_Input_Type_NEG)
+{
+  Tensor input1_tensor = makeInputTensor<DataType::S64>({1}, {1});
+  Tensor input2_tensor = makeInputTensor<DataType::S64>({1}, {2});
+  Tensor output_tensor = makeOutputTensor(DataType::S64);
+
+  SubParams params{};
+  params.activation = Activation::RELU;
+
+  Sub kernel(&input1_tensor, &input2_tensor, &output_tensor, params);
+  kernel.configure();
+  EXPECT_ANY_THROW(kernel.execute());
+}
+
+} // namespace
+} // namespace kernels
+} // namespace luci_interpreter
diff --git a/compiler/luci-interpreter/src/kernels/Tanh.test.cpp b/compiler/luci-interpreter/src/kernels/Tanh.test.cpp
index 392b8672d..f91ffa1db 100644
--- a/compiler/luci-interpreter/src/kernels/Tanh.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Tanh.test.cpp
@@ -45,8 +45,7 @@ TEST(TanhTest, Float)
       0,          -0.9999877, 0.9640275, 0.999329,  //
       0.99505475, -0.9640275, 1,         0.7615941, //
   };
-  EXPECT_THAT(extractTensorData<float>(output_tensor),
-              ElementsAreArray(ArrayFloatNear(ref_output_data)));
+  EXPECT_THAT(extractTensorData<float>(output_tensor), FloatArrayNear(ref_output_data));
 }
 
 TEST(TanhTest, Uint8)
@@ -70,13 +69,10 @@ TEST(TanhTest, Uint8)
       0,  -6, 2, 4, //
       -4, -2, 8, 1, //
   };
-  Tensor input_tensor{
-      DataType::U8, {2, 6, 4, 1}, {{input_quant_param.first}, {input_quant_param.second}}, ""};
+  Tensor input_tensor = makeInputTensor<DataType::U8>({2, 6, 4, 1}, input_quant_param.first,
+                                                      input_quant_param.second, input_data);
   Tensor output_tensor =
       makeOutputTensor(DataType::U8, output_quant_param.first, output_quant_param.second);
-  std::vector<uint8_t> quantize_input =
-      quantize<uint8_t>(input_data, input_quant_param.first, input_quant_param.second);
-  input_tensor.writeData(quantize_input.data(), quantize_input.size() * sizeof(uint8_t));
 
   Tanh kernel(&input_tensor, &output_tensor);
   kernel.configure();
@@ -97,9 +93,7 @@ TEST(TanhTest, Uint8)
       -0.999329, -0.96402,  0.99999,  0.76159,  //
   };
   std::vector<int32_t> ref_output_shape{2, 6, 4, 1};
-  EXPECT_THAT(dequantize<uint8_t>(extractTensorData<uint8_t>(output_tensor), output_tensor.scale(),
-                                  output_tensor.zero_point()),
-              ElementsAreArray(ArrayFloatNear(ref_output_data, kTanhTolerance)));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data, kTanhTolerance));
   EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(ref_output_shape));
 }
 
diff --git a/compiler/luci-interpreter/src/kernels/TestUtils.cpp b/compiler/luci-interpreter/src/kernels/TestUtils.cpp
index 2c8a6ae78..4c19c8810 100644
--- a/compiler/luci-interpreter/src/kernels/TestUtils.cpp
+++ b/compiler/luci-interpreter/src/kernels/TestUtils.cpp
@@ -17,6 +17,8 @@
 
 #include "kernels/TestUtils.h"
 
+#include <stdexcept>
+
 namespace luci_interpreter
 {
 namespace kernels
@@ -34,7 +36,25 @@ Tensor makeOutputTensor(DataType element_type, float scale, int32_t zero_point)
   return Tensor(element_type, {}, {{scale}, {zero_point}}, "");
 }
 
-std::vector<Matcher<float>> ArrayFloatNear(const std::vector<float> &values, float max_abs_error)
+std::vector<float> dequantizeTensorData(const Tensor &tensor)
+{
+  if (tensor.element_type() == DataType::U8)
+  {
+    return dequantize(extractTensorData<uint8_t>(tensor), tensor.scale(), tensor.zero_point());
+  }
+  else if (tensor.element_type() == DataType::S16)
+  {
+    // S16 quantization is symmetric, so zero point should be zero.
+    assert(tensor.zero_point() == 0);
+    return dequantize(extractTensorData<int16_t>(tensor), tensor.scale(), 0);
+  }
+  else
+  {
+    throw std::runtime_error("Unsupported type.");
+  }
+}
+
+Matcher<std::vector<float>> FloatArrayNear(const std::vector<float> &values, float max_abs_error)
 {
   std::vector<Matcher<float>> matchers;
   matchers.reserve(values.size());
@@ -42,7 +62,7 @@ std::vector<Matcher<float>> ArrayFloatNear(const std::vector<float> &values, flo
   {
     matchers.emplace_back(FloatNear(v, max_abs_error));
   }
-  return matchers;
+  return ElementsAreArray(matchers);
 }
 
 std::vector<int32_t> extractTensorShape(const Tensor &tensor)
diff --git a/compiler/luci-interpreter/src/kernels/TestUtils.h b/compiler/luci-interpreter/src/kernels/TestUtils.h
index 5311a1949..e5bd6a2db 100644
--- a/compiler/luci-interpreter/src/kernels/TestUtils.h
+++ b/compiler/luci-interpreter/src/kernels/TestUtils.h
@@ -32,6 +32,9 @@ namespace kernels
 namespace testing
 {
 
+template <typename T>
+std::vector<T> quantize(const std::vector<float> &data, float scale, int32_t zero_point);
+
 template <DataType DT>
 Tensor makeInputTensor(const Shape &shape, const std::vector<typename DataTypeImpl<DT>::Type> &data)
 {
@@ -40,6 +43,17 @@ Tensor makeInputTensor(const Shape &shape, const std::vector<typename DataTypeIm
   return tensor;
 }
 
+template <DataType DT>
+Tensor makeInputTensor(const Shape &shape, float scale, int32_t zero_point,
+                       const std::vector<float> &data)
+{
+  using NativeT = typename DataTypeImpl<DT>::Type;
+  Tensor tensor(DT, shape, {{scale}, {zero_point}}, "");
+  std::vector<NativeT> quantized_data = quantize<NativeT>(data, scale, zero_point);
+  tensor.writeData(quantized_data.data(), quantized_data.size() * sizeof(NativeT));
+  return tensor;
+}
+
 Tensor makeOutputTensor(DataType element_type);
 Tensor makeOutputTensor(DataType element_type, float scale, int32_t zero_point);
 
@@ -65,27 +79,44 @@ template <typename T> std::vector<T> extractTensorData(const Tensor &tensor)
   return std::vector<T>(data_ptr, data_ptr + tensor.shape().num_elements());
 }
 
-std::vector<::testing::Matcher<float>> ArrayFloatNear(const std::vector<float> &values,
+std::vector<float> dequantizeTensorData(const Tensor &tensor);
+
+// Array version of `::testing::FloatNear` matcher.
+::testing::Matcher<std::vector<float>> FloatArrayNear(const std::vector<float> &values,
                                                       float max_abs_error = 1.0e-5f);
 
 template <typename T>
-inline std::vector<T> quantize(const std::vector<float> &data, float scale, int32_t zero_point)
+std::vector<T> quantize(const std::vector<float> &data, float scale, int32_t zero_point)
 {
-  assert(!std::is_floating_point<T>::value);
+  static_assert(std::is_integral<T>::value, "Integral type expected.");
+
+  float q_min{}, q_max{};
+  if (std::is_signed<T>::value)
+  {
+    // For now, assume that signed type implies signed symmetric quantization.
+    assert(zero_point == 0);
+    q_min = -std::numeric_limits<T>::max();
+    q_max = std::numeric_limits<T>::max();
+  }
+  else
+  {
+    q_min = 0;
+    q_max = std::numeric_limits<T>::max();
+  }
+
   std::vector<T> q;
   for (const auto &f : data)
   {
-    q.push_back(static_cast<T>(std::max<float>(
-        std::numeric_limits<T>::lowest(),
-        std::min<float>(std::numeric_limits<T>::max(), std::round(zero_point + (f / scale))))));
+    q.push_back(static_cast<T>(
+        std::max<float>(q_min, std::min<float>(q_max, std::round(zero_point + (f / scale))))));
   }
   return q;
 }
 
 template <typename T>
-inline std::vector<float> dequantize(const std::vector<T> &data, float scale, int32_t zero_point)
+std::vector<float> dequantize(const std::vector<T> &data, float scale, int32_t zero_point)
 {
-  assert(!std::is_floating_point<T>::value);
+  static_assert(std::is_integral<T>::value, "Integral type expected.");
   std::vector<float> f;
   for (const T &q : data)
   {
@@ -94,18 +125,16 @@ inline std::vector<float> dequantize(const std::vector<T> &data, float scale, in
   return f;
 }
 
+// NOTE Returns scale and zero point for _asymmetric_ range (both signed and unsigned).
 template <typename T> std::pair<float, int32_t> quantizationParams(float f_min, float f_max)
 {
-  if (std::is_floating_point<T>::value)
-  {
-    return {1.0f, 0};
-  }
+  static_assert(std::is_integral<T>::value, "Integral type expected.");
   int32_t zero_point = 0;
-  double scale = 0;
+  float scale = 0;
   const T qmin = std::numeric_limits<T>::lowest();
   const T qmax = std::numeric_limits<T>::max();
-  const double qmin_double = qmin;
-  const double qmax_double = qmax;
+  const float qmin_double = qmin;
+  const float qmax_double = qmax;
   // 0 should always be a representable value. Let's assume that the initial
   // min,max range contains 0.
   assert(f_max >= 0);
@@ -131,16 +160,16 @@ template <typename T> std::pair<float, int32_t> quantizationParams(float f_min,
   // The arithmetic error on the zero point computed from either pair
   // will be roughly machine_epsilon * (sum of absolute values of terms)
   // so we want to use the variant that adds the smaller terms.
-  const double zero_point_from_min = qmin_double - f_min / scale;
-  const double zero_point_from_max = qmax_double - f_max / scale;
+  const float zero_point_from_min = qmin_double - f_min / scale;
+  const float zero_point_from_max = qmax_double - f_max / scale;
 
-  const double zero_point_from_min_error = std::abs(qmin_double) + std::abs(f_min / scale);
+  const float zero_point_from_min_error = std::abs(qmin_double) + std::abs(f_min / scale);
 
-  const double zero_point_from_max_error = std::abs(qmax_double) + std::abs(f_max / scale);
+  const float zero_point_from_max_error = std::abs(qmax_double) + std::abs(f_max / scale);
 
-  const double zero_point_double = zero_point_from_min_error < zero_point_from_max_error
-                                       ? zero_point_from_min
-                                       : zero_point_from_max;
+  const float zero_point_double = zero_point_from_min_error < zero_point_from_max_error
+                                      ? zero_point_from_min
+                                      : zero_point_from_max;
 
   // Now we need to nudge the zero point to be an integer
   // (our zero points are integer, and this is motivated by the requirement
@@ -168,7 +197,7 @@ template <typename T> std::pair<float, int32_t> quantizationParams(float f_min,
   assert(qmin <= nudged_zero_point);
   zero_point = nudged_zero_point;
   // finally, return the values
-  return {static_cast<float>(scale), zero_point};
+  return {scale, zero_point};
 }
 
 inline float getTolerance(float min, float max, int quantize_steps)
diff --git a/compiler/luci-interpreter/src/kernels/Transpose.test.cpp b/compiler/luci-interpreter/src/kernels/Transpose.test.cpp
index 87e6e2a00..1c99223a8 100644
--- a/compiler/luci-interpreter/src/kernels/Transpose.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/Transpose.test.cpp
@@ -29,14 +29,11 @@ using namespace testing;
 template <typename T>
 void Check(std::initializer_list<int32_t> input_shape, std::initializer_list<int32_t> perm_shape,
            std::initializer_list<int32_t> output_shape, std::initializer_list<T> input_data,
-           std::initializer_list<int32_t> perm_data, std::initializer_list<T> output_data,
-           DataType element_type)
+           std::initializer_list<int32_t> perm_data, std::initializer_list<T> output_data)
 {
-  Tensor input_tensor{element_type, input_shape, {}, ""};
-  input_tensor.writeData(input_data.begin(), input_data.size() * sizeof(T));
-
-  Tensor perm_tensor{DataType::S32, perm_shape, {}, ""};
-  perm_tensor.writeData(perm_data.begin(), perm_data.size() * sizeof(int32_t));
+  constexpr DataType element_type = getElementType<T>();
+  Tensor input_tensor = makeInputTensor<element_type>(input_shape, input_data);
+  Tensor perm_tensor = makeInputTensor<DataType::S32>(perm_shape, perm_data);
   Tensor output_tensor = makeOutputTensor(element_type);
 
   Transpose kernel(&input_tensor, &perm_tensor, &output_tensor);
@@ -60,8 +57,7 @@ TYPED_TEST(TransposeTest, Small3D)
                                    12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23},
                    /*perm_data=*/{2, 0, 1},
                    /*output_data=*/{0, 4, 8,  12, 16, 20, 1, 5, 9,  13, 17, 21,
-                                    2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23},
-                   getElementType<TypeParam>());
+                                    2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23});
 }
 
 TYPED_TEST(TransposeTest, Large4D)
@@ -84,8 +80,7 @@ TYPED_TEST(TransposeTest, Large4D)
                        10, 11, 12, 13, 14, 30, 31, 32, 33, 34, 50,  51,  52,  53,  54,
                        70, 71, 72, 73, 74, 90, 91, 92, 93, 94, 110, 111, 112, 113, 114,
                        15, 16, 17, 18, 19, 35, 36, 37, 38, 39, 55,  56,  57,  58,  59,
-                       75, 76, 77, 78, 79, 95, 96, 97, 98, 99, 115, 116, 117, 118, 119},
-      getElementType<TypeParam>());
+                       75, 76, 77, 78, 79, 95, 96, 97, 98, 99, 115, 116, 117, 118, 119});
 }
 
 TYPED_TEST(TransposeTest, Large2D)
@@ -101,15 +96,13 @@ TYPED_TEST(TransposeTest, Large2D)
                       90,  91,  92,  93,  94,  95,  96,  97,  98,  99,  100, 101, 102, 103, 104,
                       105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119},
       /*perm_data=*/{1, 0},
-      /*output_data=*/{0,  12, 24, 36,  48,  60, 72, 84, 96,  108, 1,  13, 25, 37,  49,
-                       61, 73, 85, 97,  109, 2,  14, 26, 38,  50,  62, 74, 86, 98,  110,
-                       3,  15, 27, 39,  51,  63, 75, 87, 99,  111, 4,  16, 28, 40,  52,
-                       64, 76, 88, 100, 112, 5,  17, 29, 41,  53,  65, 77, 89, 101, 113,
-                       6,  18, 30, 42,  54,  66, 78, 90, 102, 114, 7,  19, 31, 43,  55,
-                       67, 79, 91, 103, 115, 8,  20, 32, 44,  56,  68, 80, 92, 104, 116,
-                       9,  21, 33, 45,  57,  69, 81, 93, 105, 117, 10, 22, 34, 46,  58,
-                       70, 82, 94, 106, 118, 11, 23, 35, 47,  59,  71, 83, 95, 107, 119},
-      getElementType<TypeParam>());
+      /*output_data=*/{
+          0,  12, 24, 36, 48, 60, 72, 84, 96,  108, 1,  13, 25, 37, 49, 61, 73, 85, 97,  109,
+          2,  14, 26, 38, 50, 62, 74, 86, 98,  110, 3,  15, 27, 39, 51, 63, 75, 87, 99,  111,
+          4,  16, 28, 40, 52, 64, 76, 88, 100, 112, 5,  17, 29, 41, 53, 65, 77, 89, 101, 113,
+          6,  18, 30, 42, 54, 66, 78, 90, 102, 114, 7,  19, 31, 43, 55, 67, 79, 91, 103, 115,
+          8,  20, 32, 44, 56, 68, 80, 92, 104, 116, 9,  21, 33, 45, 57, 69, 81, 93, 105, 117,
+          10, 22, 34, 46, 58, 70, 82, 94, 106, 118, 11, 23, 35, 47, 59, 71, 83, 95, 107, 119});
 }
 
 } // namespace
diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.cpp b/compiler/luci-interpreter/src/kernels/TransposeConv.cpp
index 898bae3da..07d92f07f 100644
--- a/compiler/luci-interpreter/src/kernels/TransposeConv.cpp
+++ b/compiler/luci-interpreter/src/kernels/TransposeConv.cpp
@@ -43,18 +43,6 @@ void TransposeConv::configure()
   assert(input()->element_type() == DataType::FLOAT32 || input()->element_type() == DataType::U8);
   assert(input()->element_type() == output()->element_type());
   assert(input()->shape().dim(3) == filter()->shape().dim(3));
-  if (input()->element_type() == DataType::U8)
-  {
-    _scratch_tensor =
-        std::make_unique<Tensor>(DataType::S32, output()->shape(), AffineQuantization{}, "");
-    double real_multiplier = 0.0;
-    const double input_product_scale = input()->scale() * filter()->scale();
-    assert(input_product_scale >= 0);
-    real_multiplier = input_product_scale / output()->scale();
-    int exponent;
-    quantizeMultiplier(real_multiplier, &_output_multiplier, &exponent);
-    _output_shift = -exponent;
-  }
 
   const int num_dims = output_shape()->shape().dim(0);
   Shape out_shape(num_dims);
@@ -62,6 +50,31 @@ void TransposeConv::configure()
   for (int i = 0; i < num_dims; i++)
     out_shape.dim(i) = shape_data[i];
   output()->resize(out_shape);
+
+  const int32_t filter_height = filter()->shape().dim(1);
+  const int32_t filter_width = filter()->shape().dim(2);
+  const int32_t output_height = out_shape.dim(1);
+  const int32_t output_width = out_shape.dim(2);
+
+  const int32_t unused_output_height =
+      computeOutputSize(params().padding, output_height, filter_height, params().stride_height, 1);
+  const int32_t unused_output_width =
+      computeOutputSize(params().padding, output_width, filter_width, params().stride_width, 1);
+
+  _padding_height =
+      computePadding(params().stride_height, 1, output_height, filter_height, unused_output_height);
+  _padding_width =
+      computePadding(params().stride_width, 1, output_width, filter_width, unused_output_width);
+
+  if (input()->element_type() == DataType::U8)
+  {
+    _scratch_tensor =
+        std::make_unique<Tensor>(DataType::S32, output()->shape(), AffineQuantization{}, "");
+    const double input_product_scale = input()->scale() * filter()->scale();
+    assert(input_product_scale >= 0);
+    const double real_multiplier = input_product_scale / output()->scale();
+    quantizeMultiplier(real_multiplier, &_output_multiplier, &_output_shift);
+  }
 }
 
 void TransposeConv::execute() const
@@ -81,74 +94,45 @@ void TransposeConv::execute() const
 
 void TransposeConv::evalFloat() const
 {
-  const int width = output()->shape().dim(2);
-  const int height = output()->shape().dim(1);
-
-  const int filter_width = filter()->shape().dim(2);
-  const int filter_height = filter()->shape().dim(1);
-
-  int unused_output_height, unused_output_width;
-  unused_output_width =
-      computeOutputSize(params().padding, width, filter_width, params().stride_width, 1);
-  unused_output_height =
-      computeOutputSize(params().padding, height, filter_height, params().stride_height, 1);
-  int32_t offset = 0;
   tflite::ConvParams op_params{};
   op_params.padding_type = tflite::PaddingType::kSame;
-  op_params.padding_values.height = computePaddingWithOffset(
-      params().stride_height, 1, height, filter_height, unused_output_height, &offset);
-  op_params.padding_values.height_offset = offset;
-  op_params.padding_values.width = computePaddingWithOffset(
-      params().stride_width, 1, width, filter_width, unused_output_width, &offset);
-  op_params.padding_values.width_offset = offset;
+  op_params.padding_values.height = _padding_height;
+  op_params.padding_values.width = _padding_width;
   op_params.stride_height = params().stride_height;
   op_params.stride_width = params().stride_width;
   op_params.output_multiplier = _output_multiplier;
-  tflite::reference_ops::TransposeConv(
-      op_params, getTensorShape(input()), getTensorData<float>(input()), getTensorShape(filter()),
-      getTensorData<float>(filter()), getTensorShape(bias()), getTensorData<float>(bias()),
-      getTensorShape(output()), getTensorData<float>(output()), tflite::RuntimeShape(),
-      (float *)nullptr);
+  tflite::reference_ops::TransposeConv(op_params,                                                //
+                                       getTensorShape(input()), getTensorData<float>(input()),   //
+                                       getTensorShape(filter()), getTensorData<float>(filter()), //
+                                       getTensorShape(bias()), getTensorData<float>(bias()),     //
+                                       getTensorShape(output()), getTensorData<float>(output()), //
+                                       tflite::RuntimeShape(), nullptr);
 }
 
 void TransposeConv::evalQuantized() const
 {
-  int32_t input_offset = -input()->zero_point();
-  int32_t filter_offset = -filter()->zero_point();
-  int32_t output_offset = filter()->zero_point();
-  const int width = output()->shape().dim(2);
-  const int height = output()->shape().dim(1);
-
-  const int filter_width = filter()->shape().dim(2);
-  const int filter_height = filter()->shape().dim(1);
-
-  int unused_output_height, unused_output_width;
-  unused_output_width =
-      computeOutputSize(params().padding, width, filter_width, params().stride_width, 1);
-  unused_output_height =
-      computeOutputSize(params().padding, height, filter_height, params().stride_height, 1);
-  int32_t offset = 0;
   tflite::ConvParams op_params{};
   op_params.padding_type = tflite::PaddingType::kSame;
-  op_params.padding_values.height = computePaddingWithOffset(
-      params().stride_height, 1, height, filter_height, unused_output_height, &offset);
-  op_params.padding_values.width = computePaddingWithOffset(
-      params().stride_width, 1, width, filter_width, unused_output_width, &offset);
+  op_params.padding_values.height = _padding_height;
+  op_params.padding_values.width = _padding_width;
   op_params.stride_height = params().stride_height;
   op_params.stride_width = params().stride_width;
-  op_params.input_offset = input_offset;
-  op_params.output_offset = output_offset;
-  op_params.weights_offset = filter_offset;
+  // The kernel expects input and filter zero points to be negated.
+  op_params.input_offset = -input()->zero_point();    // Note the '-'.
+  op_params.weights_offset = -filter()->zero_point(); // Note the '-'.
+  op_params.output_offset = output()->zero_point();
   op_params.output_multiplier = _output_multiplier;
-  op_params.output_shift = -_output_shift;
+  op_params.output_shift = _output_shift;
   op_params.quantized_activation_min = std::numeric_limits<uint8_t>::min();
   op_params.quantized_activation_max = std::numeric_limits<uint8_t>::max();
 
-  tflite::reference_ops::TransposeConv(
-      op_params, getTensorShape(input()), getTensorData<uint8>(input()), getTensorShape(filter()),
-      getTensorData<uint8>(filter()), getTensorShape(bias()), getTensorData<int32_t>(bias()),
-      getTensorShape(output()), getTensorData<uint8>(output()), tflite::RuntimeShape(),
-      (uint8 *)nullptr, getTensorData<int32_t>(_scratch_tensor.get()));
+  tflite::reference_ops::TransposeConv(op_params,                                                //
+                                       getTensorShape(input()), getTensorData<uint8>(input()),   //
+                                       getTensorShape(filter()), getTensorData<uint8>(filter()), //
+                                       getTensorShape(bias()), getTensorData<int32_t>(bias()),   //
+                                       getTensorShape(output()), getTensorData<uint8>(output()), //
+                                       tflite::RuntimeShape(), nullptr,                          //
+                                       getTensorData<int32_t>(_scratch_tensor.get()));
 }
 
 } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.h b/compiler/luci-interpreter/src/kernels/TransposeConv.h
index 3a0eae761..444439c65 100644
--- a/compiler/luci-interpreter/src/kernels/TransposeConv.h
+++ b/compiler/luci-interpreter/src/kernels/TransposeConv.h
@@ -47,6 +47,8 @@ private:
 private:
   std::unique_ptr<Tensor> _scratch_tensor;
 
+  int32_t _padding_height{};
+  int32_t _padding_width{};
   // The scaling factor from input to output (aka the 'real multiplier') can
   // be represented as a fixed point multiplier plus a left shift.
   int32_t _output_multiplier = 0;
diff --git a/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp b/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp
index 0fbe9328b..5a69e7798 100644
--- a/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp
+++ b/compiler/luci-interpreter/src/kernels/TransposeConv.test.cpp
@@ -28,21 +28,18 @@ using namespace testing;
 
 template <typename T, typename B>
 void Check(std::initializer_list<int32_t> output_shape_shape,
-           std::initializer_list<int32_t> weight_shape,
-           std::initializer_list<int32_t> input_data_shape,
+           std::initializer_list<int32_t> weight_shape, std::initializer_list<int32_t> input_shape,
            std::initializer_list<int32_t> bias_shape, std::initializer_list<int32_t> output_shape,
            std::initializer_list<int32_t> output_shape_data, std::initializer_list<T> weight_data,
-           std::initializer_list<T> input_data_data, std::initializer_list<B> bias_data,
+           std::initializer_list<T> input_data, std::initializer_list<B> bias_data,
            std::initializer_list<T> output_data, luci::Padding padding, int32_t stride_height,
-           int32_t stride_width, DataType element_type)
+           int32_t stride_width)
 {
-  Tensor output_shape_tensor{element_type, output_shape_shape, {}, ""};
-  output_shape_tensor.writeData(output_shape_data.begin(), output_shape_data.size() * sizeof(T));
-  Tensor weight_tensor{element_type, weight_shape, {}, ""};
-  weight_tensor.writeData(weight_data.begin(), weight_data.size() * sizeof(T));
-  Tensor input_data_tensor{element_type, input_data_shape, {}, ""};
-  input_data_tensor.writeData(input_data_data.begin(), input_data_data.size() * sizeof(T));
-
+  constexpr DataType element_type = getElementType<T>();
+  Tensor output_shape_tensor =
+      makeInputTensor<DataType::S32>(output_shape_shape, output_shape_data);
+  Tensor weight_tensor = makeInputTensor<element_type>(weight_shape, weight_data);
+  Tensor input_data_tensor = makeInputTensor<element_type>(input_shape, input_data);
   Tensor output_tensor = makeOutputTensor(element_type);
 
   TransposeConvParams params{};
@@ -71,14 +68,13 @@ void Check(std::initializer_list<int32_t> output_shape_shape,
 TEST(TransposeConvTest, FloatSimple)
 {
   Check<float, float>(
-      /*outputShape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 1}, /*input_shape=*/{1, 4, 4, 1},
-      /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 4, 4, 1},
+      /*output_shape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 1}, /*input_shape=*/{1, 4, 4, 1},
+      /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 4, 4, 1},
       /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9},
       /*input_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
       /*bias_data=*/{},
       /*output_data=*/{29, 62, 83, 75, 99, 192, 237, 198, 207, 372, 417, 330, 263, 446, 485, 365},
-      /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1,
-      getElementType<float>());
+      /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1);
 
   SUCCEED();
 }
@@ -86,16 +82,15 @@ TEST(TransposeConvTest, FloatSimple)
 TEST(TransposeConvTest, FloatTwoFiltersTest)
 {
   Check<float, float>(
-      /*outputShape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 2}, /*input_shape=*/{1, 4, 4, 2},
-      /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 4, 4, 1},
+      /*output_shape_shape=*/{4}, /*weight_shape=*/{1, 3, 3, 2}, /*input_shape=*/{1, 4, 4, 2},
+      /*bias_shape=*/{}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 4, 4, 1},
       /*weight_data=*/{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18},
       /*input_data=*/{1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
                       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32},
       /*bias_data=*/{},
-      /*output_data=*/{184, 412, 568, 528, 678, 1347, 1689, 1434, 1494, 2715, 3057, 2442, 1968,
-                       3352, 3652, 2760},
-      /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1,
-      getElementType<float>());
+      /*output_data=*/
+      {184, 412, 568, 528, 678, 1347, 1689, 1434, 1494, 2715, 3057, 2442, 1968, 3352, 3652, 2760},
+      /*params.padding=*/luci::Padding::SAME, /*stride_height=*/1, /*stride_width=*/1);
 
   SUCCEED();
 }
@@ -103,28 +98,61 @@ TEST(TransposeConvTest, FloatTwoFiltersTest)
 TEST(TransposeConvTest, SimpleBiasTest)
 {
   Check<float, float>(
-      /*outputShape_shape=*/{4}, /*weight_shape=*/{2, 3, 3, 1},
+      /*output_shape_shape=*/{4}, /*weight_shape=*/{2, 3, 3, 1},
       /*input_shape=*/{1, 2, 2, 1},
-      /*bias_shape=*/{2}, /*output_shape=*/{1, 4, 4, 1}, /*outputShape_data=*/{1, 5, 5, 2},
+      /*bias_shape=*/{2}, /*output_shape=*/{1, 4, 4, 1}, /*output_shape_data=*/{1, 5, 5, 2},
       /*weight_data=*/{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18},
       /*input_data=*/{1, 2, 3, 4},
       /*bias_data=*/{3, 4},
       /*output_data=*/{4,  6,  6,  8,  10, 14, 9,  12, 13, 16, 10,  12,  12, 14, 28, 32, 21,
                        24, 25, 28, 19, 24, 27, 32, 65, 76, 45, 52,  57,  64, 24, 28, 30, 34,
                        64, 72, 39, 44, 47, 52, 42, 46, 48, 52, 106, 114, 63, 68, 71, 76},
-      /*params.padding=*/luci::Padding::VALID, /*stride_height=*/2, /*stride_width=*/2,
-      getElementType<float>());
+      /*params.padding=*/luci::Padding::VALID, /*stride_height=*/2, /*stride_width=*/2);
 
   SUCCEED();
 }
 
-// TODO Uint8Simple
-// Implement GetDequantizedOutput Function.
-// Create Test for Uint8 Case
+TEST(TransposeConvTest, UInt8)
+{
+  std::vector<float> input_data{1, 2, 3, 4};
+  std::vector<float> filter_data{1, 3, 5, 7, 9, 11, 13, 15, 17, 2, 4, 6, 8, 10, 12, 14, 16, 18};
+  std::vector<float> bias_data{3, 4};
+  std::vector<int32_t> output_shape_data{1, 5, 5, 2};
+  std::vector<float> ref_output_data{
+      4,  6,  6,  8,  10,  14,  9,  12, 13, 16, //
+      10, 12, 12, 14, 28,  32,  21, 24, 25, 28, //
+      19, 24, 27, 32, 65,  76,  45, 52, 57, 64, //
+      24, 28, 30, 34, 64,  72,  39, 44, 47, 52, //
+      42, 46, 48, 52, 106, 114, 63, 68, 71, 76, //
+  };
+
+  // Choose quantization parameters carefully.
+  auto input_quant = quantizationParams<uint8_t>(-8.0, 7.9375);  // s = 1 / 16, zp = 128
+  auto filter_quant = quantizationParams<uint8_t>(-24.0, 39.75); // s = 1 / 4, zp = 96
+  auto output_quant = quantizationParams<uint8_t>(-64.0, 191.0); // s = 1, zp = 64
+
+  Tensor input_tensor = makeInputTensor<DataType::U8>({1, 2, 2, 1}, input_quant.first,
+                                                      input_quant.second, input_data);
+  Tensor filter_tensor = makeInputTensor<DataType::U8>({2, 3, 3, 1}, filter_quant.first,
+                                                       filter_quant.second, filter_data);
+  Tensor bias_tensor =
+      makeInputTensor<DataType::S32>({2}, input_quant.first * filter_quant.first, 0, bias_data);
+  Tensor output_shape_tensor = makeInputTensor<DataType::S32>({4}, output_shape_data);
+  Tensor output_tensor = makeOutputTensor(DataType::U8, output_quant.first, output_quant.second);
+
+  TransposeConvParams params{};
+  params.padding = Padding::VALID;
+  params.stride_height = 2;
+  params.stride_width = 2;
+
+  TransposeConv kernel(&output_shape_tensor, &filter_tensor, &input_tensor, &bias_tensor,
+                       &output_tensor, params);
+  kernel.configure();
+  kernel.execute();
 
-// TODO Uint8FiltersTest
-// Implement GetDequantizedOutput Function.
-// Create Test for Uint8 Case
+  EXPECT_THAT(extractTensorShape(output_tensor), ::testing::ElementsAreArray(output_shape_data));
+  EXPECT_THAT(dequantizeTensorData(output_tensor), FloatArrayNear(ref_output_data));
+}
 
 } // namespace
 } // namespace kernels
diff --git a/compiler/luci-interpreter/src/kernels/Utils.cpp b/compiler/luci-interpreter/src/kernels/Utils.cpp
index b9e7738a9..52e76a81c 100644
--- a/compiler/luci-interpreter/src/kernels/Utils.cpp
+++ b/compiler/luci-interpreter/src/kernels/Utils.cpp
@@ -89,20 +89,23 @@ static void calculateActivationRangeQuantizedImpl(Activation activation, int32_t
 void calculateActivationRangeQuantized(Activation activation, const Tensor *output,
                                        int32_t *activation_min, int32_t *activation_max)
 {
+  // For now, assume that signed type implies signed symmetric quantization.
   int32_t qmin{};
   int32_t qmax{};
   switch (output->element_type())
   {
     case DataType::U8:
-      qmin = std::numeric_limits<uint8_t>::min();
+      qmin = 0;
       qmax = std::numeric_limits<uint8_t>::max();
       break;
     case DataType::S8:
-      qmin = std::numeric_limits<int8_t>::min();
+      assert(output->zero_point() == 0);
+      qmin = -std::numeric_limits<int8_t>::max();
       qmax = std::numeric_limits<int8_t>::max();
       break;
     case DataType::S16:
-      qmin = std::numeric_limits<int16_t>::min();
+      assert(output->zero_point() == 0);
+      qmin = -std::numeric_limits<int16_t>::max();
       qmax = std::numeric_limits<int16_t>::max();
       break;
     default:
diff --git a/compiler/luci-interpreter/src/kernels/Utils.h b/compiler/luci-interpreter/src/kernels/Utils.h
index 7927151c6..67bb7581a 100644
--- a/compiler/luci-interpreter/src/kernels/Utils.h
+++ b/compiler/luci-interpreter/src/kernels/Utils.h
@@ -25,6 +25,7 @@
 
 #include <cassert>
 #include <cstdint>
+#include <stdexcept>
 
 namespace luci_interpreter
 {
@@ -70,6 +71,11 @@ inline int32_t computeOutputSize(Padding padding, int32_t image_size, int32_t fi
   }
 }
 
+inline int32_t calcOffset(const Shape &shape, int32_t d0, int32_t d1, int32_t d2, int32_t d3)
+{
+  return ((d0 * shape.dim(1) + d1) * shape.dim(2) + d2) * shape.dim(3) + d3;
+}
+
 void calculateActivationRange(Activation activation, float *activation_min, float *activation_max);
 
 void calculateActivationRangeQuantized(Activation activation, const Tensor *output,
@@ -94,6 +100,14 @@ void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quan
 
 Shape calculateShapeForBroadcast(const Shape &input1_shape, const Shape &input2_shape);
 
+inline double getQuantizedConvolutionMultipler(float input_scale, float filter_scale,
+                                               float output_scale)
+{
+  const double input_product_scale = static_cast<double>(input_scale * filter_scale);
+  LUCI_INTERPRETER_CHECK(input_product_scale >= 0);
+  return input_product_scale / static_cast<double>(output_scale);
+}
+
 inline tflite::RuntimeShape getTensorShape(const Tensor *tensor)
 {
   if (tensor == nullptr)
author	Chunseok Lee <chunseok.lee@samsung.com>	2020-10-28 12:16:55 +0900
committer	Chunseok Lee <chunseok.lee@samsung.com>	2020-10-28 12:16:55 +0900
commit	c55f8a6db48cda9d3a78048338b7f18c4cca62b8 (patch)
tree	761ee8e171e5203f5c598ad93b2e7e0bc2e31aa2 /compiler/luci-interpreter/src/kernels
parent	74476a2d0296bdad70a2f7f90bc7419a8b05bffd (diff)
download	nnfw-c55f8a6db48cda9d3a78048338b7f18c4cca62b8.tar.gz nnfw-c55f8a6db48cda9d3a78048338b7f18c4cca62b8.tar.bz2 nnfw-c55f8a6db48cda9d3a78048338b7f18c4cca62b8.zip