29 files changed, 2588 insertions, 0 deletions
diff --git a/runtimes/neurun/src/kernel/CMakeLists.txt b/runtimes/neurun/src/kernel/CMakeLists.txt
new file mode 100644
index 000000000..a39823102
--- /dev/null
+++ b/runtimes/neurun/src/kernel/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_subdirectory(cpu)
+add_subdirectory(acl_cl)
diff --git a/runtimes/neurun/src/kernel/acl_cl/CMakeLists.txt b/runtimes/neurun/src/kernel/acl_cl/CMakeLists.txt
new file mode 100644
index 000000000..857fe6fe6
--- /dev/null
+++ b/runtimes/neurun/src/kernel/acl_cl/CMakeLists.txt
@@ -0,0 +1,15 @@
+file(GLOB SOURCES "*.cc")
+
+add_library(${LIB_NEURUN_KERNEL_ACL_CL} STATIC ${SOURCES})
+
+target_include_directories(${LIB_NEURUN_KERNEL_ACL_CL} PUBLIC ${NNFW_INCLUDE_DIR})
+target_include_directories(${LIB_NEURUN_KERNEL_ACL_CL} PUBLIC ${NEURUN_INCLUDE_DIR})
+target_include_directories(${LIB_NEURUN_KERNEL_ACL_CL} PUBLIC ${CMAKE_SOURCE_DIR}/externals/tensorflow) # TODO We should not need this
+
+target_link_libraries(${LIB_NEURUN_KERNEL_ACL_CL} arm_compute)
+target_link_libraries(${LIB_NEURUN_KERNEL_ACL_CL} tensorflow-lite) # TODO We should not need this
+target_link_libraries(${LIB_NEURUN_KERNEL_ACL_CL} ${LIB_NEURUN_KERNEL_CPU}) # TODO We should not need this
+
+set_target_properties(${LIB_NEURUN_KERNEL_ACL_CL} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+set_target_properties(${LIB_NEURUN_KERNEL_ACL_CL} PROPERTIES OUTPUT_NAME kernel_acl_cl)
+install(TARGETS ${LIB_NEURUN_KERNEL_ACL_CL} DESTINATION lib/neurun)
diff --git a/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.cc b/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.cc
new file mode 100644
index 000000000..b75ac90f0
--- /dev/null
+++ b/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.cc
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConcatLayer.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+#include "backend/acl_cl/kernel/View.h"
+#include "logging.h"
+
+namespace
+{
+
+bool matchSizeExceptAxis(const ::arm_compute::ICLTensor *t1, const ::arm_compute::ICLTensor *t2,
+                         uint32_t axis)
+{
+  assert(t1->info()->num_dimensions() <= 4);
+  assert(t2->info()->num_dimensions() <= 4);
+
+  for (uint32_t i = 0; i < 4; i++)
+  {
+    if (axis == i)
+      continue;
+    if (t1->info()->dimension(i) != t2->info()->dimension(i))
+      return false;
+  }
+  return true;
+}
+
+} // namespace {anonymous}
+
+namespace neurun
+{
+namespace kernel
+{
+namespace acl_cl
+{
+
+ConcatLayer::ConcatLayer()
+    : _input_allocs(), _output_alloc(nullptr), _axis(0), _input_type(OperandType::SCALAR_FLOAT32)
+{
+  // DO NOTHING
+}
+
+bool ConcatLayer::concatenationFloat32()
+{
+  // Input and output size check
+  {
+    // NOTE Support only tensor with dimension 4 or less
+
+    uint32_t axis_sum = 0;
+
+    for (auto input : _input_allocs)
+    {
+      assert(matchSizeExceptAxis(_output_alloc, input, _axis));
+      axis_sum += input->info()->dimension(_axis);
+    }
+
+    assert(_output_alloc->info()->dimension(_axis) == axis_sum);
+  }
+
+  VERBOSE(Concat_RUN) << "START Concat" << std::endl;
+
+  // Perform operation
+  {
+    uint32_t axis_offset = 0;
+
+    auto &queue = ::arm_compute::CLScheduler::get().queue();
+
+    _output_alloc->map(queue);
+    ::internal::arm_compute::kernel::View<float> output_view{_output_alloc};
+
+    for (auto input : _input_allocs)
+    {
+      input->map(queue);
+      const ::internal::arm_compute::kernel::View<float> input_reader{input};
+
+      for (uint32_t n = 0; n < input_reader.shape().N; n++)
+      {
+        for (uint32_t c = 0; c < input_reader.shape().C; c++)
+        {
+          for (uint32_t h = 0; h < input_reader.shape().H; h++)
+          {
+            for (uint32_t w = 0; w < input_reader.shape().W; w++)
+            {
+              uint32_t no = (_axis == 3) ? axis_offset : 0;
+              uint32_t co = (_axis == 2) ? axis_offset : 0;
+              uint32_t ho = (_axis == 1) ? axis_offset : 0;
+              uint32_t wo = (_axis == 0) ? axis_offset : 0;
+              output_view.at(n + no, c + co, h + ho, w + wo) = input_reader.at(n, c, h, w);
+            }
+          }
+        }
+      }
+      if (_axis == 3)
+        axis_offset += input_reader.shape().N;
+      if (_axis == 2)
+        axis_offset += input_reader.shape().C;
+      if (_axis == 1)
+        axis_offset += input_reader.shape().H;
+      if (_axis == 0)
+        axis_offset += input_reader.shape().W;
+
+      input->unmap(queue);
+    }
+    _output_alloc->unmap(queue);
+  }
+
+  VERBOSE(Concat_RUN) << "End   Concat" << std::endl;
+
+  return true;
+}
+
+void ConcatLayer::configure(const std::vector<::arm_compute::ICLTensor *> &input_allocs,
+                            int32_t axis, ::arm_compute::ICLTensor *output_alloc)
+{
+  _input_allocs = input_allocs;
+  _output_alloc = output_alloc;
+
+  assert(axis < 4);
+
+  // This map converts NHWC to NCHW(reversed)
+  // NHWC -> WHCN
+  static const uint32_t axis_map[] = {3, 1, 0, 2};
+  _axis = axis_map[axis];
+
+  // TODO Support Quant8
+  _input_type = OperandType::TENSOR_FLOAT32;
+}
+
+void ConcatLayer::run()
+{
+  if (_input_type == OperandType::TENSOR_FLOAT32)
+  {
+    concatenationFloat32();
+  }
+  else if (_input_type == OperandType::TENSOR_QUANT8_ASYMM)
+  {
+    throw std::runtime_error("NYI - concatenationQuant8()");
+  }
+}
+
+} // namespace acl_cl
+} // namespace kernel
+} // namespace neurun
diff --git a/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.h b/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.h
new file mode 100644
index 000000000..4767721fa
--- /dev/null
+++ b/runtimes/neurun/src/kernel/acl_cl/ConcatLayer.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_KERNEL_ACL_CL_CONCAT_LAYER_H__
+#define __INTERNAL_KERNEL_ACL_CL_CONCAT_LAYER_H__
+
+#include <NeuralNetworks.h>
+
+#include <arm_compute/core/CL/ICLTensor.h>
+#include <arm_compute/runtime/IFunction.h>
+
+#include "graph/operand/DataType.h"
+
+using OperandType = neurun::graph::operand::DataType;
+
+namespace neurun
+{
+namespace kernel
+{
+namespace acl_cl
+{
+
+//
+// neurun::kernel::acl_cl::ConcatLayer
+// A naive implementation of ConcatLayer for ACL
+//
+
+class ConcatLayer : public ::arm_compute::IFunction
+{
+public:
+  ConcatLayer();
+
+public:
+  void configure(const std::vector<::arm_compute::ICLTensor *> &input_allocs,
+                 int32_t axis /* NNAPI tensor axis from NHWC order */,
+                 ::arm_compute::ICLTensor *output_alloc);
+
+  void run();
+
+private:
+  bool concatenationFloat32();
+
+private:
+  std::vector<::arm_compute::ICLTensor *> _input_allocs;
+  ::arm_compute::ICLTensor *_output_alloc;
+  int32_t _axis;
+  OperandType _input_type;
+};
+
+} // namespace acl_cl
+} // namespace kernel
+} // namespace neurun
+
+#endif // __INTERNAL_KERNEL_ACL_CL_CONCAT_LAYER_H__
diff --git a/runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.cc b/runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.cc
new file mode 100644
index 000000000..fa1d77579
--- /dev/null
+++ b/runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.cc
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE
+//
+
+#if 0
+
+#include "TensorConvertFromCommonLayer.h"
+
+#include "internal/nnapi/feature/Reader.h"
+#include "backend/acl_cl/feature/View.h"
+
+#include <util/feature/IndexIterator.h>
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+namespace neurun
+{
+namespace kernel
+{
+namespace acl_cl
+{
+
+bool TensorConvertFromCommonLayer::convert()
+{
+  auto inputBuffer = _inputTensor->buffer();
+  auto inputSize = _inputTensor->info()->total_size();
+
+  auto &queue = ::arm_compute::CLScheduler::get().queue();
+
+  _outputTensor->map(queue);
+
+  if (_tensorShape.rank() == 2)
+  {
+    const auto len = _tensorShape.dim(1);
+
+    auto base = reinterpret_cast<const float *>(inputBuffer);
+
+    for (int32_t n = 0; n < len; ++n)
+    {
+      auto from = base + n;
+      auto into =
+          reinterpret_cast<float *>(_outputTensor->ptr_to_element(::arm_compute::Coordinates{n}));
+
+      *into = *from;
+    }
+  }
+  else if (_tensorShape.rank() == 4)
+  {
+    auto featureShape = _tensorShape.asFeature();
+
+    const ::internal::nnapi::feature::Reader<float> from{featureShape, inputBuffer, inputSize};
+    ::internal::arm_compute::feature::View<float> into{_outputTensor};
+
+    ::nnfw::util::feature::iterate(featureShape)
+        << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+             const auto value = from.at(batch, ch, row, col);
+             into.at(batch, ch, row, col) = value;
+           };
+  }
+
+  _outputTensor->unmap(queue);
+}
+
+void TensorConvertFromCommonLayer::configure(::internal::common::Tensor *inputTensor,
+                                             ::arm_compute::ICLTensor *outputTensor,
+                                             const ::neurun::graph::operand::Shape &tensorShape)
+{
+  _inputTensor = inputTensor;
+  _outputTensor = outputTensor;
+  _tensorShape = tensorShape;
+}
+
+void TensorConvertFromCommonLayer::run() { convert(); }
+
+} // namespace acl_cl
+} // namespace kernel
+} // namespace neurun
+
+#endif
diff --git a/runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.h b/runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.h
new file mode 100644
index 000000000..bd031a106
--- /dev/null
+++ b/runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE
+//
+
+#if 0
+
+#ifndef __INTERNAL_KERNELS_ACL_CL_TENSOR_CONVERT_FROM_COMMON_LAYER_H__
+#define __INTERNAL_KERNELS_ACL_CL_TENSOR_CONVERT_FROM_COMMON_LAYER_H__
+
+#include <NeuralNetworks.h>
+
+#include <arm_compute/runtime/IFunction.h>
+#include <arm_compute/core/CL/ICLTensor.h>
+
+#include "internal/Model.h"
+#include "internal/common/Tensor.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace acl_cl
+{
+
+class TensorConvertFromCommonLayer : public ::arm_compute::IFunction
+{
+public:
+  TensorConvertFromCommonLayer() {}
+
+public:
+  bool convert();
+
+  void configure(::internal::common::Tensor *inputTensor, ::arm_compute::ICLTensor *outputTensor,
+                 const ::neurun::graph::operand::Shape &tensorShape);
+
+  void run();
+
+private:
+  ::internal::common::Tensor *_inputTensor;
+  ::arm_compute::ICLTensor *_outputTensor;
+
+  ::neurun::graph::operand::Shape _tensorShape{1};
+};
+
+} // namespace acl_cl
+} // namespace kernel
+} // namespace neurun
+
+#endif // __INTERNAL_KERNELS_ACL_CL_TENSOR_CONVERT_FROM_COMMON_LAYER_H__
+
+#endif
diff --git a/runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.cc b/runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.cc
new file mode 100644
index 000000000..985524bc3
--- /dev/null
+++ b/runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.cc
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE
+//
+
+#if 0
+
+#include "TensorConvertToCommonLayer.h"
+
+#include "backend/acl_cl/feature/View.h"
+#include "internal/nnapi/feature/View.h"
+
+#include <util/feature/IndexIterator.h>
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+namespace neurun
+{
+namespace kernel
+{
+namespace acl_cl
+{
+
+bool TensorConvertToCommonLayer::convert()
+{
+  auto outputBuffer = _outputTensor->buffer();
+  auto outputSize = _outputTensor->info()->total_size();
+
+  auto &queue = ::arm_compute::CLScheduler::get().queue();
+
+  _inputTensor->map(queue);
+
+  if (_tensorShape.rank() == 2)
+  {
+    const auto len = _tensorShape.dim(1);
+
+    auto base = reinterpret_cast<float *>(outputBuffer);
+
+    for (int32_t n = 0; n < len; ++n)
+    {
+      auto from = reinterpret_cast<const float *>(
+          _inputTensor->ptr_to_element(::arm_compute::Coordinates{n}));
+      auto into = base + n;
+
+      *into = *from;
+    }
+  }
+  else if (_tensorShape.rank() == 4)
+  {
+    auto featureShape = _tensorShape.asFeature();
+
+    const ::internal::arm_compute::feature::View<float> from{_inputTensor};
+    ::internal::nnapi::feature::View<float> into{featureShape, outputBuffer, outputSize};
+
+    ::nnfw::util::feature::iterate(featureShape)
+        << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+             const auto value = from.at(batch, ch, row, col);
+             into.at(batch, ch, row, col) = value;
+           };
+  }
+
+  _inputTensor->unmap(queue);
+}
+
+void TensorConvertToCommonLayer::configure(::arm_compute::ICLTensor *inputTensor,
+                                           ::internal::common::Tensor *outputTensor,
+                                           const ::neurun::graph::operand::Shape &tensorShape)
+{
+  _inputTensor = inputTensor;
+  _outputTensor = outputTensor;
+  _tensorShape = tensorShape;
+}
+
+void TensorConvertToCommonLayer::run() { convert(); }
+
+} // namespace acl_cl
+} // namespace kernel
+} // namespace neurun
+
+#endif
diff --git a/runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.h b/runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.h
new file mode 100644
index 000000000..576f1ee71
--- /dev/null
+++ b/runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE
+//
+
+#if 0
+
+#ifndef __INTERNAL_KERNELS_ACL_CL_TENSOR_CONVERT_TO_COMMON_LAYER_H__
+#define __INTERNAL_KERNELS_ACL_CL_TENSOR_CONVERT_TO_COMMON_LAYER_H__
+
+#include <NeuralNetworks.h>
+
+#include <arm_compute/runtime/IFunction.h>
+#include <arm_compute/core/CL/ICLTensor.h>
+
+#include "internal/Model.h"
+#include "internal/common/Tensor.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace acl_cl
+{
+
+class TensorConvertToCommonLayer : public ::arm_compute::IFunction
+{
+public:
+  TensorConvertToCommonLayer() {}
+
+public:
+  bool convert();
+
+  void configure(::arm_compute::ICLTensor *inputTensor, ::internal::common::Tensor *outputTensor,
+                 const ::neurun::graph::operand::Shape &tensorShape);
+
+  void run();
+
+private:
+  ::arm_compute::ICLTensor *_inputTensor;
+  ::internal::common::Tensor *_outputTensor;
+
+  ::neurun::graph::operand::Shape _tensorShape{1};
+};
+
+} // namespace acl_cl
+} // namespace kernel
+} // namespace neurun
+
+#endif // __INTERNAL_KERNELS_ACL_CL_TENSOR_CONVERT_TO_COMMON_LAYER_H__
+
+#endif
diff --git a/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.cc b/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.cc
new file mode 100644
index 000000000..2a6a84e10
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.cc
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "AvgPoolLayer.h"
+
+#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "kernel/cpu/OperationUtils.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+#define AVGPOOLING_PARAMETERS                               \
+  uint32_t height = getSizeOfDimension(_inputShape, 1);     \
+  uint32_t width = getSizeOfDimension(_inputShape, 2);      \
+  uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \
+  uint32_t outWidth = getSizeOfDimension(_outputShape, 2);  \
+                                                            \
+  uint32_t paddingHeight = (uint32_t)_paddingTop;           \
+  uint32_t paddingWidth = (uint32_t)_paddingLeft;
+
+AvgPoolLayer::AvgPoolLayer()
+    : _inputData(nullptr), _outputData(nullptr), _inputShape(), _outputShape(), _paddingLeft(0),
+      _paddingTop(0), _paddingRight(0), _paddingBottom(0), _strideWidth(0), _strideHeight(0),
+      _kernelWidth(0), _kernelHeight(0), _activation(ANEURALNETWORKS_FUSED_NONE),
+      _inputType(OperandType::SCALAR_FLOAT32)
+{
+  // DO NOTHING
+}
+
+bool AvgPoolLayer::averagePoolFloat32()
+{
+
+  AVGPOOLING_PARAMETERS
+  float output_activation_min, output_activation_max;
+  CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+
+  ::tflite::optimized_ops::AveragePool(
+      reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape), _strideWidth,
+      _strideHeight, paddingWidth, paddingHeight, _kernelWidth, _kernelHeight,
+      output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData),
+      convertShapeToDims(_outputShape));
+  return true;
+}
+bool AvgPoolLayer::averagePoolQuant8()
+{
+
+  AVGPOOLING_PARAMETERS
+  int32_t output_activation_min = 0;
+  int32_t output_activation_max = 0;
+  CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min,
+                                &output_activation_max);
+
+  ::tflite::optimized_ops::AveragePool(_inputData, convertShapeToDims(_inputShape), _strideWidth,
+                                       _strideHeight, paddingWidth, paddingHeight, _kernelWidth,
+                                       _kernelHeight, output_activation_min, output_activation_max,
+                                       _outputData, convertShapeToDims(_outputShape));
+  return true;
+}
+
+void AvgPoolLayer::configure(uint8_t *inputData, const Shape inputShape, const uint32_t paddingLeft,
+                             const uint32_t paddingRight, const uint32_t paddingTop,
+                             const uint32_t paddingBottom, const uint32_t strideWidth,
+                             const uint32_t strideHeight, const uint32_t kernelWidth,
+                             const uint32_t kernelHeight, const FuseCode activation,
+                             uint8_t *outputData, const Shape outputShape)
+{
+  _inputData = inputData;
+  _inputShape = inputShape;
+  _inputType = inputShape.type;
+  _paddingLeft = paddingLeft;
+  _paddingRight = paddingRight;
+  _paddingTop = paddingTop;
+  _paddingBottom = paddingBottom;
+  _strideWidth = strideWidth;
+  _strideHeight = strideHeight;
+  _kernelWidth = kernelWidth;
+  _kernelHeight = kernelHeight;
+  _activation = activation;
+  _outputData = outputData;
+  _outputShape = outputShape;
+}
+
+void AvgPoolLayer::run()
+{
+  if (_inputType == OperandType::TENSOR_FLOAT32)
+  {
+    averagePoolFloat32();
+  }
+  else if (_inputType == OperandType::TENSOR_QUANT8_ASYMM)
+  {
+    throw std::runtime_error{"AvgPoolLayer : Not tested for TENSOR_QUANT8_ASYMM"};
+    // averagePoolQuant8();
+  }
+}
+
+#undef AVGPOOLING_PARAMETERS
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
diff --git a/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.h b/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.h
new file mode 100644
index 000000000..9f390a9e1
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_KERNEL_CPU_AVGPOOLLAYER_H__
+#define __NEURUN_KERNEL_CPU_AVGPOOLLAYER_H__
+
+#include <NeuralNetworks.h>
+
+#include <arm_compute/runtime/IFunction.h>
+
+#include "kernel/cpu/OperationUtils.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+class AvgPoolLayer : public ::arm_compute::IFunction
+{
+public:
+  AvgPoolLayer();
+
+public:
+  bool averagePoolFloat32();
+
+  bool averagePoolQuant8();
+
+  void configure(uint8_t *inputData, const Shape inputShape, const uint32_t paddingLeft,
+                 const uint32_t paddingRight, const uint32_t paddingTop,
+                 const uint32_t paddingBottom, const uint32_t strideWidth,
+                 const uint32_t strideHeight, const uint32_t kernelWidth,
+                 const uint32_t kernelHeight, const FuseCode activation, uint8_t *outputData,
+                 const Shape outputShape);
+
+  void run();
+
+private:
+  uint8_t *_inputData;
+  uint8_t *_outputData;
+
+  Shape _inputShape;
+  Shape _outputShape;
+
+  uint32_t _paddingLeft;
+  uint32_t _paddingTop;
+  uint32_t _paddingRight;
+  uint32_t _paddingBottom;
+
+  uint32_t _strideWidth;
+  uint32_t _strideHeight;
+  uint32_t _kernelWidth;
+  uint32_t _kernelHeight;
+
+  FuseCode _activation;
+
+  OperandType _inputType;
+};
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
+
+#endif // __NEURUN_KERNEL_CPU_AVGPOOLLAYER_H__
diff --git a/runtimes/neurun/src/kernel/cpu/CMakeLists.txt b/runtimes/neurun/src/kernel/cpu/CMakeLists.txt
new file mode 100644
index 000000000..dddf154c3
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/CMakeLists.txt
@@ -0,0 +1,14 @@
+file(GLOB SOURCES "*.cc")
+
+add_library(${LIB_NEURUN_KERNEL_CPU} STATIC ${SOURCES})
+
+target_include_directories(${LIB_NEURUN_KERNEL_CPU} PUBLIC ${NNFW_INCLUDE_DIR})
+target_include_directories(${LIB_NEURUN_KERNEL_CPU} PUBLIC ${NEURUN_INCLUDE_DIR})
+target_include_directories(${LIB_NEURUN_KERNEL_CPU} PUBLIC ${CMAKE_SOURCE_DIR}/externals/tensorflow)
+
+target_link_libraries(${LIB_NEURUN_KERNEL_CPU} arm_compute) # TODO We should not need this
+target_link_libraries(${LIB_NEURUN_KERNEL_CPU} tensorflow-lite)
+
+set_target_properties(${LIB_NEURUN_KERNEL_CPU} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+set_target_properties(${LIB_NEURUN_KERNEL_CPU} PROPERTIES OUTPUT_NAME kernel_cpu)
+install(TARGETS ${LIB_NEURUN_KERNEL_CPU} DESTINATION lib/neurun)
diff --git a/runtimes/neurun/src/kernel/cpu/ConcatLayer.cc b/runtimes/neurun/src/kernel/cpu/ConcatLayer.cc
new file mode 100644
index 000000000..5fe5e3993
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/ConcatLayer.cc
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConcatLayer.h"
+
+#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "kernel/cpu/OperationUtils.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+ConcatLayer::ConcatLayer()
+    : _inputDataPtrs(), _outputData(nullptr), _axis(0), _inputShapes(), _outputShape(),
+      _inputType(OperandType::SCALAR_FLOAT32)
+{
+  // DO NOTHING
+}
+
+bool ConcatLayer::concatenationFloat32()
+{
+  int num_inputs = _inputShapes.size();
+  std::vector<::tflite::Dims<4> *> inputDimsPtr(num_inputs);
+  std::vector<::tflite::Dims<4>> inputDims(num_inputs);
+  for (int i = 0; i < num_inputs; i++)
+  {
+    inputDims[i] = convertShapeToDims(_inputShapes[i]);
+    inputDimsPtr[i] = &inputDims[i];
+  }
+
+  std::vector<const float *> inputFloatPtrs;
+
+  for (auto ptr : _inputDataPtrs)
+  {
+    inputFloatPtrs.emplace_back(reinterpret_cast<const float *>(ptr));
+  }
+
+  ::tflite::optimized_ops::Concatenation<::tflite::FusedActivationFunctionType::kNone, float>(
+      getNumberOfDimensions(_outputShape) - _axis - 1, inputFloatPtrs.data(), inputDimsPtr.data(),
+      num_inputs, reinterpret_cast<float *>(_outputData), convertShapeToDims(_outputShape));
+  return true;
+}
+bool ConcatLayer::concatenationQuant8()
+{
+  int num_inputs = _inputShapes.size();
+  std::vector<::tflite::Dims<4> *> inputDimsPtr(num_inputs);
+  std::vector<::tflite::Dims<4>> inputDims(num_inputs);
+  for (int i = 0; i < num_inputs; i++)
+  {
+    inputDims[i] = convertShapeToDims(_inputShapes[i]);
+    inputDimsPtr[i] = &inputDims[i];
+  }
+  ::tflite::optimized_ops::Concatenation<::tflite::FusedActivationFunctionType::kNone, uint8_t>(
+      getNumberOfDimensions(_outputShape) - _axis - 1, _inputDataPtrs.data(), inputDimsPtr.data(),
+      num_inputs, _outputData, convertShapeToDims(_outputShape));
+  return true;
+}
+
+void ConcatLayer::configure(const std::vector<const uint8_t *> &inputDataPtrs,
+                            const std::vector<Shape> &inputShapes, int32_t axis,
+                            uint8_t *outputData, const Shape outputShape)
+{
+  _inputDataPtrs = inputDataPtrs;
+
+  for (auto shape : inputShapes)
+  {
+    _inputShapes.emplace_back(shape);
+    _inputType = shape.type;
+  }
+
+  _axis = axis;
+
+  _outputData = outputData;
+  _outputShape = outputShape;
+}
+
+void ConcatLayer::run()
+{
+  if (_inputType == OperandType::TENSOR_FLOAT32)
+  {
+    concatenationFloat32();
+  }
+  else if (_inputType == OperandType::TENSOR_QUANT8_ASYMM)
+  {
+    throw std::runtime_error{"ConcatLayer : Not tested for TENSOR_QUANT8_ASYMM"};
+    // concatenationQuant8();
+  }
+}
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
diff --git a/runtimes/neurun/src/kernel/cpu/ConcatLayer.h b/runtimes/neurun/src/kernel/cpu/ConcatLayer.h
new file mode 100644
index 000000000..9aacab5e8
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/ConcatLayer.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_KERNEL_CPU_CONCATLAYER_H__
+#define __NEURUN_KERNEL_CPU_CONCATLAYER_H__
+
+#include <NeuralNetworks.h>
+
+#include <arm_compute/runtime/IFunction.h>
+
+#include "kernel/cpu/OperationUtils.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+class ConcatLayer : public ::arm_compute::IFunction
+{
+public:
+  ConcatLayer();
+
+public:
+  bool concatenationFloat32();
+
+  bool concatenationQuant8();
+
+  void configure(const std::vector<const uint8_t *> &inputDataPtrs,
+                 const std::vector<Shape> &inputShapes, int32_t axis, uint8_t *outputData,
+                 const Shape outputShape);
+
+  void run();
+
+private:
+  std::vector<const uint8_t *> _inputDataPtrs;
+  uint8_t *_outputData;
+
+  int32_t _axis;
+
+  std::vector<Shape> _inputShapes;
+  Shape _outputShape;
+
+  OperandType _inputType;
+};
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
+
+#endif // __NEURUN_KERNEL_CPU_CONCATLAYER_H__
diff --git a/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc b/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc
new file mode 100644
index 000000000..81e88e0f0
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ConvolutionLayer.h"
+
+#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "kernel/cpu/OperationUtils.h"
+
+#include <mutex>
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+// If possible we will use this static buffer for the tensor.
+static constexpr int kStaticBufferSize = 1605632;
+static char static_scratch_buffer[kStaticBufferSize];
+static std::mutex executionMutex;
+
+#define ANDROID_NN_CONV_PARAMETERS(Type)                                      \
+  uint32_t height = getSizeOfDimension(_inputShape, 1);                       \
+  uint32_t width = getSizeOfDimension(_inputShape, 2);                        \
+  uint32_t kernelHeight = getSizeOfDimension(_kernelShape, 1);                \
+  uint32_t kernelWidth = getSizeOfDimension(_kernelShape, 2);                 \
+  uint32_t outHeight = getSizeOfDimension(_outputShape, 1);                   \
+  uint32_t outWidth = getSizeOfDimension(_outputShape, 2);                    \
+  uint32_t inDepth = getSizeOfDimension(_inputShape, 3);                      \
+                                                                              \
+  uint32_t paddingHeight = (uint32_t)_paddingTop;                             \
+  uint32_t paddingWidth = (uint32_t)_paddingLeft;                             \
+                                                                              \
+  ::tflite::Dims<4> im2colDim;                                                \
+  im2colDim.sizes[3] = (int)getSizeOfDimension(_outputShape, 0);              \
+  im2colDim.sizes[2] = (int)getSizeOfDimension(_outputShape, 1);              \
+  im2colDim.sizes[1] = (int)getSizeOfDimension(_outputShape, 2);              \
+  im2colDim.sizes[0] = (int)inDepth * kernelHeight * kernelWidth;             \
+                                                                              \
+  im2colDim.strides[0] = 1;                                                   \
+  for (int i = 1; i < 4; i++)                                                 \
+  {                                                                           \
+    im2colDim.strides[i] = im2colDim.strides[i - 1] * im2colDim.sizes[i - 1]; \
+  }                                                                           \
+  Type *im2colData = nullptr;                                                 \
+  uint64_t im2colByteSize = sizeof(Type);                                     \
+  std::unique_ptr<Type[]> im2colGuard;                                        \
+  for (int i = 0; i < 4; i++)                                                 \
+  {                                                                           \
+    im2colByteSize *= im2colDim.sizes[i];                                     \
+  }                                                                           \
+  /* http://b/77982879, tflite::optimized_ops::Conv uses int for offsets */   \
+  if (im2colByteSize >= 0x7fffffff)                                           \
+  {                                                                           \
+    std::cout << "Conv size is too large, not enough memory" << std::endl;    \
+    return false;                                                             \
+  }                                                                           \
+  if (im2colByteSize <= kStaticBufferSize)                                    \
+  {                                                                           \
+    im2colData = reinterpret_cast<Type *>(static_scratch_buffer);             \
+  }                                                                           \
+  else                                                                        \
+  {                                                                           \
+    im2colData = new (std::nothrow) Type[im2colByteSize / sizeof(Type)];      \
+    if (im2colData == nullptr)                                                \
+    {                                                                         \
+      std::cout << "Conv size is too large, not enough memory" << std::endl;  \
+      return false;                                                           \
+    }                                                                         \
+    im2colGuard.reset(im2colData);                                            \
+  }
+
+ConvolutionLayer::ConvolutionLayer()
+    : _inputData(nullptr), _kernelData(nullptr), _outputData(nullptr), _biasData(nullptr),
+      _inputShape(), _kernelShape(), _outputShape(), _biasShape(), _paddingLeft(0), _paddingTop(0),
+      _paddingRight(0), _paddingBottom(0), _strideWidth(0), _strideHeight(0),
+      _activation(ANEURALNETWORKS_FUSED_NONE), _inputType(OperandType::SCALAR_FLOAT32)
+{
+  // DO NOTHING
+}
+
+bool ConvolutionLayer::convFloat32()
+{
+  ANDROID_NN_CONV_PARAMETERS(float)
+
+  const ::tflite::Dims<4> &kernel_dim = convertShapeToDims(_kernelShape);
+  const int kernel_width = ArraySize(kernel_dim, 1);
+  const int kernel_height = ArraySize(kernel_dim, 2);
+  const bool need_im2col =
+      _strideWidth != 1 || _strideHeight != 1 || kernel_width != 1 || kernel_height != 1;
+
+  float *im2colDataToPass = nullptr;
+  if (need_im2col)
+  {
+    im2colDataToPass = im2colData;
+  }
+
+  float output_activation_min, output_activation_max;
+  CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+  int32_t dilationWidthFactor = 1, dilationHeightFactor = 1;
+  ::tflite::optimized_ops::Conv(
+      reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape),
+      reinterpret_cast<const float *>(_kernelData), convertShapeToDims(_kernelShape),
+      reinterpret_cast<const float *>(_biasData), convertShapeToDims(_biasShape), _strideWidth,
+      _strideHeight, dilationWidthFactor, dilationHeightFactor, paddingWidth, paddingHeight,
+      output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData),
+      convertShapeToDims(_outputShape), im2colDataToPass, im2colDim);
+  return true;
+}
+
+bool ConvolutionLayer::convQuant8()
+{
+  ANDROID_NN_CONV_PARAMETERS(uint8_t)
+  int32_t inputOffset = -_inputShape.offset;
+  int32_t kernelOffset = -_kernelShape.offset;
+  int32_t outputOffset = _outputShape.offset;
+  float real_multiplier = 0.0;
+  int32_t output_multiplier = 0;
+  int32_t output_shift = 0;
+  int32_t output_activation_min = 0;
+  int32_t output_activation_max = 0;
+  if (!GetQuantizedConvolutionMultipler(_inputShape, _kernelShape, _biasShape, _outputShape,
+                                        &real_multiplier) ||
+      !QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier, &output_shift))
+  {
+    return false;
+  }
+  CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min,
+                                &output_activation_max);
+  static gemmlowp::GemmContext gemm_context;
+  // Prevent concurrent executions that may access the scratch buffer and
+  // gemm_context.
+  std::unique_lock<std::mutex> lock(executionMutex);
+  // Alow gemmlowp automatically decide how many threads to use.
+  gemm_context.set_max_num_threads(0);
+  ::tflite::optimized_ops::Conv(
+      _inputData, convertShapeToDims(_inputShape), inputOffset, _kernelData,
+      convertShapeToDims(_kernelShape), kernelOffset, reinterpret_cast<const int32_t *>(_biasData),
+      convertShapeToDims(_biasShape), _strideWidth, _strideHeight, paddingWidth, paddingHeight,
+      outputOffset, output_multiplier, output_shift, output_activation_min, output_activation_max,
+      _outputData, convertShapeToDims(_outputShape), im2colData, im2colDim, &gemm_context);
+  return true;
+}
+
+void ConvolutionLayer::configure(uint8_t *inputData, const Shape inputShape, uint8_t *kernelData,
+                                 const Shape kernelShape, uint8_t *biasData, const Shape biasShape,
+                                 const uint32_t paddingLeft, const uint32_t paddingRight,
+                                 const uint32_t paddingTop, const uint32_t paddingBottom,
+                                 const uint32_t strideWidth, const uint32_t strideHeight,
+                                 const FuseCode activation, uint8_t *outputData,
+                                 const Shape outputShape)
+{
+  _inputData = inputData;
+  _inputShape = inputShape;
+  _inputType = inputShape.type;
+  _kernelData = kernelData;
+  _kernelShape = kernelShape;
+  _biasData = biasData;
+  _biasShape = biasShape;
+  _paddingLeft = paddingLeft;
+  _paddingRight = paddingRight;
+  _paddingTop = paddingTop;
+  _paddingBottom = paddingBottom;
+  _strideWidth = strideWidth;
+  _strideHeight = strideHeight;
+  _activation = activation;
+  _outputData = outputData;
+  _outputShape = outputShape;
+}
+
+void ConvolutionLayer::run()
+{
+  if (_inputType == OperandType::TENSOR_FLOAT32)
+  {
+    convFloat32();
+  }
+  else if (_inputType == OperandType::TENSOR_QUANT8_ASYMM)
+  {
+    throw std::runtime_error{"ConvolutionLayer : Not tested for TENSOR_QUANT8_ASYMM"};
+    // convQuant8();
+  }
+}
+
+#undef ANDROID_NN_CONV_PARAMETERS
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
diff --git a/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.h b/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.h
new file mode 100644
index 000000000..b7afbcec6
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_KERNEL_CPU_CONVOLUTIONLAYER_H__
+#define __NEURUN_KERNEL_CPU_CONVOLUTIONLAYER_H__
+
+#include <NeuralNetworks.h>
+
+#include <arm_compute/runtime/IFunction.h>
+
+#include "kernel/cpu/OperationUtils.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+class ConvolutionLayer : public ::arm_compute::IFunction
+{
+public:
+  ConvolutionLayer();
+
+public:
+  bool convFloat32();
+
+  bool convQuant8();
+
+  void configure(uint8_t *inputData, const Shape inputShape, uint8_t *kernelData,
+                 const Shape kernelShape, uint8_t *biasData, const Shape biasShape,
+                 const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
+                 const uint32_t paddingBottom, const uint32_t strideW, const uint32_t strideH,
+                 const FuseCode activation, uint8_t *outputData, const Shape outputShape);
+
+  void run();
+
+private:
+  uint8_t *_inputData;
+  uint8_t *_kernelData;
+  uint8_t *_outputData;
+  uint8_t *_biasData;
+
+  Shape _inputShape;
+  Shape _kernelShape;
+  Shape _outputShape;
+  Shape _biasShape;
+
+  uint32_t _paddingLeft;
+  uint32_t _paddingTop;
+  uint32_t _paddingRight;
+  uint32_t _paddingBottom;
+
+  uint32_t _strideWidth;
+  uint32_t _strideHeight;
+
+  FuseCode _activation;
+
+  OperandType _inputType;
+};
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
+
+#endif // __NEURUN_KERNEL_CPU_CONVOLUTIONLAYER_H__
diff --git a/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc b/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc
new file mode 100644
index 000000000..41b9afc0c
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FullyConnectedLayer.h"
+
+#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h"
+#include "kernel/cpu/OperationUtils.h"
+
+#include <mutex>
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+FullyConnectedLayer::FullyConnectedLayer()
+    : _inputData(nullptr), _weightsData(nullptr), _biasData(nullptr), _outputData(nullptr),
+      _inputShape(), _weightsShape(), _biasShape(), _outputShape(),
+      _activation(ANEURALNETWORKS_FUSED_NONE), _inputType(OperandType::SCALAR_FLOAT32)
+{
+  // DO NOTHING
+}
+
+// executionMutex is used to protect concurrent access of non-threadsafe resources
+// like gemmlowp::GemmContext.
+// std::mutex is safe for pthreads on Android.
+static std::mutex executionMutex;
+bool FullyConnectedLayer::fullyConnectedFloat32()
+{
+  float output_activation_min, output_activation_max;
+  CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+  // b/80425683, optimized implementation produces incorrect results when the
+  // number of input elements is the squre of batch_size.
+  uint32_t batch_size = getSizeOfDimension(_outputShape, 0);
+  uint32_t input_n_elements = getNumberOfElements(_inputShape);
+  if (batch_size * batch_size == input_n_elements)
+  {
+    ::tflite::reference_ops::FullyConnected(
+        reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape),
+        reinterpret_cast<const float *>(_weightsData), convertShapeToDims(_weightsShape),
+        reinterpret_cast<const float *>(_biasData), convertShapeToDims(_biasShape),
+        output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData),
+        convertShapeToDims(_outputShape));
+  }
+  else
+  {
+    ::tflite::optimized_ops::FullyConnected(
+        reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape),
+        reinterpret_cast<const float *>(_weightsData), convertShapeToDims(_weightsShape),
+        reinterpret_cast<const float *>(_biasData), convertShapeToDims(_biasShape),
+        output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData),
+        convertShapeToDims(_outputShape));
+  }
+  return true;
+}
+
+bool FullyConnectedLayer::fullyConnectedQuant8()
+{
+  int32_t inputOffset = -_inputShape.offset;
+  int32_t weightsOffset = -_weightsShape.offset;
+  int32_t outputOffset = _outputShape.offset;
+  float real_multiplier = 0.0;
+  int32_t output_multiplier = 0;
+  int32_t output_shift = 0;
+  int32_t output_activation_min = 0;
+  int32_t output_activation_max = 0;
+  // Caution : 'Convolution' can make misleading. It seems it is just math term.
+  if (!GetQuantizedConvolutionMultipler(_inputShape, _weightsShape, _biasShape, _outputShape,
+                                        &real_multiplier) ||
+      !QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier, &output_shift))
+  {
+    return false;
+  }
+  CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min,
+                                &output_activation_max);
+  static gemmlowp::GemmContext gemm_context;
+  // Prevent concurrent executions that access gemm_context.
+  std::unique_lock<std::mutex> lock(executionMutex);
+  // Alow gemmlowp automatically decide how many threads to use.
+  gemm_context.set_max_num_threads(0);
+  ::tflite::optimized_ops::FullyConnected(
+      _inputData, convertShapeToDims(_inputShape), inputOffset, _weightsData,
+      convertShapeToDims(_weightsShape), weightsOffset,
+      reinterpret_cast<const int32_t *>(_biasData), convertShapeToDims(_biasShape), outputOffset,
+      output_multiplier, output_shift, output_activation_min, output_activation_max, _outputData,
+      convertShapeToDims(_outputShape), &gemm_context);
+  return true;
+}
+
+void FullyConnectedLayer::configure(uint8_t *inputData, const Shape inputShape,
+                                    uint8_t *weightsData, const Shape weightsShape,
+                                    uint8_t *biasData, const Shape biasShape, FuseCode activation,
+                                    uint8_t *outputData, const Shape outputShape)
+{
+  _inputData = inputData;
+  _inputShape = inputShape;
+  _inputType = inputShape.type;
+  _weightsData = weightsData;
+  _weightsShape = weightsShape;
+  _biasData = biasData;
+  _biasShape = biasShape;
+  _activation = activation;
+  _outputData = outputData;
+  _outputShape = outputShape;
+}
+
+void FullyConnectedLayer::run()
+{
+  if (_inputType == OperandType::TENSOR_FLOAT32)
+  {
+    fullyConnectedFloat32();
+  }
+  else if (_inputType == OperandType::TENSOR_QUANT8_ASYMM)
+  {
+    throw std::runtime_error{"FullyConnectedLayer : Not tested for TENSOR_QUANT8_ASYMM"};
+    // fullyConnectedQuant8();
+  }
+}
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
diff --git a/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.h b/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.h
new file mode 100644
index 000000000..b1ba172b0
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_KERNEL_CPU_FULLYCONNECTEDLAYER_H__
+#define __NEURUN_KERNEL_CPU_FULLYCONNECTEDLAYER_H__
+
+#include <NeuralNetworks.h>
+
+#include <arm_compute/runtime/IFunction.h>
+
+#include "kernel/cpu/OperationUtils.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+class FullyConnectedLayer : public ::arm_compute::IFunction
+{
+public:
+  FullyConnectedLayer();
+
+public:
+  bool fullyConnectedFloat32();
+
+  bool fullyConnectedQuant8();
+
+  void configure(uint8_t *inputData, const Shape inputShape, uint8_t *weightsData,
+                 const Shape weightsShape, uint8_t *biasData, const Shape biasShape,
+                 FuseCode activation, uint8_t *outputData, const Shape outputShape);
+
+  void run();
+
+private:
+  uint8_t *_inputData;
+  uint8_t *_weightsData;
+  uint8_t *_biasData;
+  uint8_t *_outputData;
+
+  Shape _inputShape;
+  Shape _weightsShape;
+  Shape _biasShape;
+  Shape _outputShape;
+
+  FuseCode _activation;
+
+  OperandType _inputType;
+};
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
+
+#endif // __NEURUN_KERNEL_CPU_FULLYCONNECTEDLAYER_H__
diff --git a/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.cc b/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.cc
new file mode 100644
index 000000000..3d96bb401
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.cc
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "MaxPoolLayer.h"
+
+#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "kernel/cpu/OperationUtils.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+#define MAXPOOLING_PARAMETERS                               \
+  uint32_t height = getSizeOfDimension(_inputShape, 1);     \
+  uint32_t width = getSizeOfDimension(_inputShape, 2);      \
+  uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \
+  uint32_t outWidth = getSizeOfDimension(_outputShape, 2);  \
+                                                            \
+  uint32_t paddingHeight = (uint32_t)_paddingTop;           \
+  uint32_t paddingWidth = (uint32_t)_paddingLeft;
+
+MaxPoolLayer::MaxPoolLayer()
+    : _inputData(nullptr), _outputData(nullptr), _inputShape(), _outputShape(), _paddingLeft(0),
+      _paddingTop(0), _paddingRight(0), _paddingBottom(0), _strideWidth(0), _strideHeight(0),
+      _kernelWidth(0), _kernelHeight(0), _activation(ANEURALNETWORKS_FUSED_NONE),
+      _inputType(OperandType::SCALAR_FLOAT32)
+{
+  // DO NOTHING
+}
+
+bool MaxPoolLayer::maxPoolFloat32()
+{
+
+  MAXPOOLING_PARAMETERS
+  float output_activation_min, output_activation_max;
+  CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+
+  ::tflite::optimized_ops::MaxPool(
+      reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape), _strideWidth,
+      _strideHeight, paddingWidth, paddingHeight, _kernelWidth, _kernelHeight,
+      output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData),
+      convertShapeToDims(_outputShape));
+  return true;
+}
+bool MaxPoolLayer::maxPoolQuant8()
+{
+
+  MAXPOOLING_PARAMETERS
+  int32_t output_activation_min = 0;
+  int32_t output_activation_max = 0;
+  CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min,
+                                &output_activation_max);
+
+  ::tflite::optimized_ops::MaxPool(_inputData, convertShapeToDims(_inputShape), _strideWidth,
+                                   _strideHeight, paddingWidth, paddingHeight, _kernelWidth,
+                                   _kernelHeight, output_activation_min, output_activation_max,
+                                   _outputData, convertShapeToDims(_outputShape));
+  return true;
+}
+
+void MaxPoolLayer::configure(uint8_t *inputData, const Shape inputShape, const uint32_t paddingLeft,
+                             const uint32_t paddingRight, const uint32_t paddingTop,
+                             const uint32_t paddingBottom, const uint32_t strideWidth,
+                             const uint32_t strideHeight, const uint32_t kernelWidth,
+                             const uint32_t kernelHeight, const FuseCode activation,
+                             uint8_t *outputData, const Shape outputShape)
+{
+  _inputData = inputData;
+
+  _inputShape = inputShape;
+  _inputType = inputShape.type;
+  _paddingLeft = paddingLeft;
+  _paddingRight = paddingRight;
+  _paddingTop = paddingTop;
+  _paddingBottom = paddingBottom;
+  _strideWidth = strideWidth;
+  _strideHeight = strideHeight;
+  _kernelWidth = kernelWidth;
+  _kernelHeight = kernelHeight;
+  _activation = activation;
+  _outputData = outputData;
+  _outputShape = outputShape;
+}
+
+void MaxPoolLayer::run()
+{
+  if (_inputType == OperandType::TENSOR_FLOAT32)
+  {
+    maxPoolFloat32();
+  }
+  else if (_inputType == OperandType::TENSOR_QUANT8_ASYMM)
+  {
+    throw std::runtime_error{"MaxPoolLayer : Not tested for TENSOR_QUANT8_ASYMM"};
+    // maxPoolQuant8();
+  }
+}
+
+#undef MAXPOOLING_PARAMETERS
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
diff --git a/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.h b/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.h
new file mode 100644
index 000000000..b42efb9f6
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_KERNEL_CPU_MAXPOOLLAYER_H__
+#define __NEURUN_KERNEL_CPU_MAXPOOLLAYER_H__
+
+#include <NeuralNetworks.h>
+
+#include <arm_compute/runtime/IFunction.h>
+
+#include "kernel/cpu/OperationUtils.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+class MaxPoolLayer : public ::arm_compute::IFunction
+{
+public:
+  MaxPoolLayer();
+
+public:
+  bool maxPoolFloat32();
+
+  bool maxPoolQuant8();
+
+  void configure(uint8_t *inputData, const Shape inputShape, const uint32_t paddingLeft,
+                 const uint32_t paddingRight, const uint32_t paddingTop,
+                 const uint32_t paddingBottom, const uint32_t strideWidth,
+                 const uint32_t strideHeight, const uint32_t kernelWidth,
+                 const uint32_t kernelHeight, const FuseCode activation, uint8_t *outputData,
+                 const Shape outputShape);
+
+  void run();
+
+private:
+  uint8_t *_inputData;
+  uint8_t *_outputData;
+
+  Shape _inputShape;
+  Shape _outputShape;
+
+  uint32_t _paddingLeft;
+  uint32_t _paddingTop;
+  uint32_t _paddingRight;
+  uint32_t _paddingBottom;
+
+  uint32_t _strideWidth;
+  uint32_t _strideHeight;
+  uint32_t _kernelWidth;
+  uint32_t _kernelHeight;
+
+  FuseCode _activation;
+
+  OperandType _inputType;
+};
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
+
+#endif // __NEURUN_KERNEL_CPU_MAXPOOLLAYER_H__
diff --git a/runtimes/neurun/src/kernel/cpu/OperationUtils.cc b/runtimes/neurun/src/kernel/cpu/OperationUtils.cc
new file mode 100644
index 000000000..5ec2f8e62
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/OperationUtils.cc
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel/cpu/OperationUtils.h"
+
+#include <cmath>
+#include <algorithm>
+#include <cassert>
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+uint32_t getNumberOfDimensions(const Shape &shape) { return shape.dimensions.size(); }
+
+uint32_t getNumberOfElements(const Shape &shape)
+{
+  uint32_t count = 1;
+  for (size_t i = 0; i < shape.dimensions.size(); i++)
+  {
+    count *= shape.dimensions[i];
+  }
+  return count;
+}
+
+uint32_t getSizeOfDimension(const Shape &shape, uint32_t dimensionIdx)
+{
+  if (dimensionIdx >= shape.dimensions.size())
+  {
+    // TODO, log the error
+    return 0;
+  }
+  return shape.dimensions[dimensionIdx];
+}
+
+bool QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t *quantized_multiplier,
+                                      int32_t *right_shift)
+{
+  assert(double_multiplier >= 0.);
+  assert(double_multiplier < 1.);
+  if (double_multiplier == 0.)
+  {
+    *quantized_multiplier = 0;
+    *right_shift = 0;
+    return true;
+  }
+  assert(double_multiplier > 0.);
+  const double q = std::frexp(double_multiplier, right_shift);
+  *right_shift *= -1;
+  int64_t q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31)));
+  assert(q_fixed <= (1ll << 31));
+  if (q_fixed == (1ll << 31))
+  {
+    q_fixed /= 2;
+    --*right_shift;
+  }
+  assert(*right_shift >= 0);
+  assert(q_fixed <= std::numeric_limits<int32_t>::max());
+  *quantized_multiplier = static_cast<int32_t>(q_fixed);
+  return true;
+}
+
+bool GetQuantizedConvolutionMultipler(const Shape &inputShape, const Shape &filterShape,
+                                      const Shape &biasShape, const Shape &outputShape,
+                                      float *multiplier)
+{
+  const float input_product_scale = inputShape.scale * filterShape.scale;
+  const float bias_scale = biasShape.scale;
+  const float output_scale = outputShape.scale;
+  // The following conditions must be guaranteed by the training pipeline.
+  assert(std::abs(input_product_scale - bias_scale) <=
+         1e-6 * std::min(input_product_scale, bias_scale));
+  assert(input_product_scale >= 0);
+  assert(input_product_scale < output_scale);
+  *multiplier = input_product_scale / output_scale;
+  return true;
+}
+
+bool QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t *quantized_multiplier,
+                                      int *left_shift)
+{
+  assert(double_multiplier > 1.);
+  const double q = std::frexp(double_multiplier, left_shift);
+  int64_t q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31)));
+  assert(q_fixed <= (1ll << 31));
+  if (q_fixed == (1ll << 31))
+  {
+    q_fixed /= 2;
+    ++*left_shift;
+  }
+  assert(*left_shift >= 0);
+  assert(q_fixed <= std::numeric_limits<int32_t>::max());
+  *quantized_multiplier = static_cast<int32_t>(q_fixed);
+  return true;
+}
+
+void CalculateActivationRangeFloat(int32_t activation, float *activation_min, float *activation_max)
+{
+  if (activation == ANEURALNETWORKS_FUSED_RELU)
+  {
+    *activation_min = 0.f;
+    *activation_max = std::numeric_limits<float>::max();
+  }
+  else if (activation == ANEURALNETWORKS_FUSED_RELU6)
+  {
+    *activation_min = 0.f;
+    *activation_max = 6.f;
+  }
+  else if (activation == ANEURALNETWORKS_FUSED_RELU1)
+  {
+    *activation_min = -1.f;
+    *activation_max = 1.f;
+  }
+  else if (activation == ANEURALNETWORKS_FUSED_NONE)
+  {
+    *activation_min = std::numeric_limits<float>::lowest();
+    *activation_max = std::numeric_limits<float>::max();
+  }
+  else
+  {
+    std::cout << "Unsupported fused activation function." << std::endl;
+  }
+}
+
+void CalculateActivationRangeUint8(int32_t activation, const Shape &outputShape, int32_t *act_min,
+                                   int32_t *act_max)
+{
+  const int32_t qmin = std::numeric_limits<uint8_t>::min();
+  const int32_t qmax = std::numeric_limits<uint8_t>::max();
+  const auto scale = outputShape.scale;
+  const auto zero_point = outputShape.offset;
+  auto quantize = [scale, zero_point](float f) {
+    return zero_point + static_cast<int32_t>(std::round(f / scale));
+  };
+  if (activation == ANEURALNETWORKS_FUSED_RELU)
+  {
+    *act_min = std::max(qmin, quantize(0.0));
+    *act_max = qmax;
+  }
+  else if (activation == ANEURALNETWORKS_FUSED_RELU6)
+  {
+    *act_min = std::max(qmin, quantize(0.0));
+    *act_max = std::min(qmax, quantize(6.0));
+  }
+  else if (activation == ANEURALNETWORKS_FUSED_RELU1)
+  {
+    *act_min = std::max(qmin, quantize(-1.0));
+    *act_max = std::min(qmax, quantize(1.0));
+  }
+  else if (activation == ANEURALNETWORKS_FUSED_NONE)
+  {
+    *act_min = qmin;
+    *act_max = qmax;
+  }
+  else
+  {
+    std::cout << "Unsupported fused activation function." << std::endl;
+  }
+}
+
+int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift)
+{
+  const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) *
+                                    (1ll << (31 - input_integer_bits)) / (1ll << input_left_shift);
+  // Tighten bound using floor.  Suppose that we could use the exact value.
+  // After scaling the difference, the result would be at the maximum.  Thus we
+  // must ensure that our value has lower magnitude.
+  return static_cast<int32_t>(std::floor(max_input_rescaled));
+}
+
+Shape getShape(const ::neurun::graph::operand::Object &o)
+{
+  Shape shape;
+
+  shape.type = static_cast<OperandType>(static_cast<int32_t>(o.typeInfo().type()));
+  shape.dimensions = std::vector<uint32_t>(o.shape().dims().begin(), o.shape().dims().end());
+  shape.scale = o.typeInfo().scale();
+  // shape.offset = _offset;
+
+  return shape;
+}
+
+size_t sizeOfData(OperandType type, const std::vector<uint32_t> &dimensions)
+{
+  size_t size = 4;
+
+  switch (type)
+  {
+    case OperandType::SCALAR_FLOAT32:
+    case OperandType::SCALAR_INT32:
+    case OperandType::SCALAR_UINT32:
+    case OperandType::TENSOR_FLOAT32:
+    case OperandType::TENSOR_INT32:
+      size = 4;
+      break;
+    case OperandType::TENSOR_QUANT8_ASYMM:
+      size = 1;
+      break;
+    default:
+      throw std::runtime_error("Not supported operand type.");
+      break;
+  }
+
+  for (auto d : dimensions)
+  {
+    size *= d;
+  }
+
+  return size;
+}
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
diff --git a/runtimes/neurun/src/kernel/cpu/OperationUtils.h b/runtimes/neurun/src/kernel/cpu/OperationUtils.h
new file mode 100644
index 000000000..5914d04e3
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/OperationUtils.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_SUPPORT_NNAPI_OPERATION_UTILS_H__
+#define __NNFW_SUPPORT_NNAPI_OPERATION_UTILS_H__
+
+#include <NeuralNetworks.h>
+
+#include <iostream>
+#include <limits>
+#include <vector>
+
+#include "tensorflow/contrib/lite/kernels/internal/types.h"
+#include "graph/operand/Object.h"
+#include "graph/operand/DataType.h"
+
+using OperandType = neurun::graph::operand::DataType;
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+struct Shape
+{
+  OperandType type;
+  std::vector<uint32_t> dimensions;
+  float scale;
+  int32_t offset;
+};
+
+uint32_t getNumberOfDimensions(const Shape &shape);
+
+uint32_t getNumberOfElements(const Shape &shape);
+
+uint32_t getSizeOfDimension(const Shape &shape, uint32_t dimensionIdx);
+
+inline ::tflite::Dims<4> convertShapeToDims(const Shape &shape)
+{
+  // nnAssert(shape.dimensions.size() <= 4);
+  ::tflite::Dims<4> dims;
+  // The dimensions are reversed in Dims<4>.
+  for (int i = 0; i < 4; ++i)
+  {
+    int src = static_cast<int>(shape.dimensions.size()) - i - 1;
+    if (src >= 0)
+    {
+      dims.sizes[i] = static_cast<int>(getSizeOfDimension(shape, src));
+    }
+    else
+    {
+      dims.sizes[i] = 1;
+    }
+  }
+  dims.strides[0] = 1;
+  for (int i = 1; i < 4; i++)
+  {
+    dims.strides[i] = dims.strides[i - 1] * dims.sizes[i - 1];
+  }
+  return dims;
+}
+
+__wur bool QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t *quantized_multiplier,
+                                            int32_t *right_shift);
+
+__wur bool GetQuantizedConvolutionMultipler(const Shape &inputShape, const Shape &filterShape,
+                                            const Shape &biasShape, const Shape &outputShape,
+                                            float *multiplier);
+__wur bool QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t *quantized_multiplier,
+                                            int *left_shift);
+
+void CalculateActivationRangeFloat(int32_t activation, float *activation_min,
+                                   float *activation_max);
+
+void CalculateActivationRangeUint8(int32_t activation, const Shape &outputShape, int32_t *act_min,
+                                   int32_t *act_max);
+
+int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift);
+
+Shape getShape(const ::neurun::graph::operand::Object &o);
+
+uint32_t sizeOfData(OperandType type, const std::vector<uint32_t> &dimensions);
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
+
+#endif // __NNFW_SUPPORT_NNAPI_OPERATION_UTILS_H__
diff --git a/runtimes/neurun/src/kernel/cpu/ReshapeLayer.cc b/runtimes/neurun/src/kernel/cpu/ReshapeLayer.cc
new file mode 100644
index 000000000..377f783e0
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/ReshapeLayer.cc
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ReshapeLayer.h"
+
+#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h"
+#include "kernel/cpu/OperationUtils.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+ReshapeLayer::ReshapeLayer()
+    : _inputData(nullptr), _outputData(nullptr), _inputShape(), _outputShape()
+{
+  // DO NOTHING
+}
+
+bool ReshapeLayer::reshapeGeneric()
+{
+  size_t count = sizeOfData(_inputShape.type, _inputShape.dimensions);
+  memcpy(reinterpret_cast<void *>(_outputData), reinterpret_cast<const void *>(_inputData), count);
+  return true;
+}
+
+void ReshapeLayer::configure(uint8_t *inputData, const Shape &inputShape, uint8_t *outputData,
+                             const Shape &outputShape)
+{
+  _inputData = inputData;
+  _inputShape = inputShape;
+  _outputData = outputData;
+  _outputShape = outputShape;
+}
+
+void ReshapeLayer::run() { reshapeGeneric(); }
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
diff --git a/runtimes/neurun/src/kernel/cpu/ReshapeLayer.h b/runtimes/neurun/src/kernel/cpu/ReshapeLayer.h
new file mode 100644
index 000000000..395cc1d7f
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/ReshapeLayer.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_KERNEL_CPU_RESHAPELAYER_H__
+#define __NEURUN_KERNEL_CPU_RESHAPELAYER_H__
+
+#include <NeuralNetworks.h>
+
+#include <arm_compute/runtime/IFunction.h>
+
+#include "kernel/cpu/OperationUtils.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+class ReshapeLayer : public ::arm_compute::IFunction
+{
+public:
+  ReshapeLayer();
+
+public:
+  bool reshapeGeneric();
+
+  void configure(uint8_t *inputData, const Shape &inputShape, uint8_t *outputData,
+                 const Shape &outputShape);
+
+  void run();
+
+private:
+  uint8_t *_inputData;
+  uint8_t *_outputData;
+
+  Shape _inputShape;
+  Shape _outputShape;
+};
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
+
+#endif // __NEURUN_KERNEL_CPU_RESHAPELAYER_H__
diff --git a/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc b/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc
new file mode 100644
index 000000000..4f5a69f2e
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "SoftMaxLayer.h"
+
+#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "kernel/cpu/OperationUtils.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+SoftMaxLayer::SoftMaxLayer()
+    : _inputData(nullptr), _outputData(nullptr), _beta(0.0), _inputShape(), _outputShape(),
+      _inputType(OperandType::SCALAR_FLOAT32)
+{
+  // DO NOTHING
+}
+
+bool SoftMaxLayer::softmaxFloat32()
+{
+  ::tflite::Dims<4> dim;
+  if (getNumberOfDimensions(_inputShape) == 2)
+  {
+    uint32_t batch_size = getSizeOfDimension(_inputShape, 0);
+    uint32_t input_size = getNumberOfElements(_inputShape) / batch_size;
+    Shape shapeIn4D;
+    shapeIn4D.dimensions = {batch_size, 1, 1, input_size};
+    dim = convertShapeToDims(shapeIn4D);
+  }
+  else if (getNumberOfDimensions(_inputShape) == 4)
+  {
+    dim = convertShapeToDims(_inputShape);
+  }
+  else
+  {
+    std::cout << "only 2D and 4D tensors supported" << std::endl;
+    return false;
+  }
+  ::tflite::optimized_ops::Softmax(reinterpret_cast<const float *>(_inputData), dim, _beta,
+                                   reinterpret_cast<float *>(_outputData), dim);
+  return true;
+}
+
+bool SoftMaxLayer::softmaxQuant8()
+{
+  ::tflite::Dims<4> dim;
+  if (getNumberOfDimensions(_inputShape) == 2)
+  {
+    uint32_t batch_size = getSizeOfDimension(_inputShape, 0);
+    uint32_t input_size = getNumberOfElements(_inputShape) / batch_size;
+    Shape shapeIn4D;
+    shapeIn4D.dimensions = {batch_size, 1, 1, input_size};
+    dim = convertShapeToDims(shapeIn4D);
+  }
+  else if (getNumberOfDimensions(_inputShape) == 4)
+  {
+    dim = convertShapeToDims(_inputShape);
+  }
+  else
+  {
+    std::cout << "only 2D and 4D tensors supported" << std::endl;
+    return false;
+  }
+  if (_outputShape.offset != 0 || _outputShape.scale != 1.f / 256)
+  {
+    std::cout << "incorrect scale / offset for output" << std::endl;
+    return false;
+  }
+  static const int32_t kScaledDiffIntegerBits = 5;
+  const double input_beta_real_multiplier = std::min(
+      1.0 * _beta * _inputShape.scale * (1 << (31 - kScaledDiffIntegerBits)), (1ll << 31) - 1.0);
+  int32_t input_multiplier = 0;
+  int32_t input_left_shift = 0;
+  if (!QuantizeMultiplierGreaterThanOne(input_beta_real_multiplier, &input_multiplier,
+                                        &input_left_shift))
+  {
+    return false;
+  }
+  float diff_min = -1.0f * CalculateInputRadius(kScaledDiffIntegerBits, input_left_shift);
+  ::tflite::optimized_ops::Softmax(_inputData, dim, input_multiplier, input_left_shift, diff_min,
+                                   _outputData, dim);
+  return true;
+}
+
+void SoftMaxLayer::configure(uint8_t *inputData, const Shape &inputShape, const float beta,
+                             uint8_t *outputData, const Shape &outputShape)
+{
+  _inputData = inputData;
+  _inputShape = inputShape;
+  _inputType = inputShape.type;
+  _outputData = outputData;
+  _outputShape = outputShape;
+  _beta = beta;
+}
+
+void SoftMaxLayer::run()
+{
+  if (_inputType == OperandType::TENSOR_FLOAT32)
+  {
+    softmaxFloat32();
+  }
+  else if (_inputType == OperandType::TENSOR_QUANT8_ASYMM)
+  {
+    throw std::runtime_error{"SoftMaxLayer : Not tested for TENSOR_QUANT8_ASYMM"};
+    // softmaxQuant8();
+  }
+}
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
diff --git a/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.h b/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.h
new file mode 100644
index 000000000..8057be52f
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEURUN_KERNEL_CPU_SOFTMAXLAYER_H__
+#define __NEURUN_KERNEL_CPU_SOFTMAXLAYER_H__
+
+#include <NeuralNetworks.h>
+
+#include <arm_compute/runtime/IFunction.h>
+
+#include "kernel/cpu/OperationUtils.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+class SoftMaxLayer : public ::arm_compute::IFunction
+{
+public:
+  SoftMaxLayer();
+
+public:
+  bool softmaxFloat32();
+
+  bool softmaxQuant8();
+
+  void configure(uint8_t *inputData, const Shape &inputShape, const float beta, uint8_t *outputData,
+                 const Shape &outputShape);
+
+  void run();
+
+private:
+  uint8_t *_inputData;
+  uint8_t *_outputData;
+
+  float _beta;
+
+  Shape _inputShape;
+  Shape _outputShape;
+
+  OperandType _inputType;
+};
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
+
+#endif // __NEURUN_KERNEL_CPU_SOFTMAXLAYER_H__
diff --git a/runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.cc b/runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.cc
new file mode 100644
index 000000000..00e914732
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.cc
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE
+//
+
+#if 0
+
+#include "TensorConvertFromCommonLayer.h"
+
+#include "internal/nnapi/feature/Reader.h"
+#include "internal/nnapi/feature/View.h"
+
+#include <util/feature/IndexIterator.h>
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+bool TensorConvertFromCommonLayer::convert()
+{
+  auto inputBuffer = _inputTensor->buffer();
+  auto inputSize = _inputTensor->info()->total_size();
+
+  auto outputBuffer = _outputTensor->buffer();
+  auto outputSize = _outputTensor->info()->total_size();
+
+  if (_tensorShape.rank() == 2)
+  {
+    const auto len = _tensorShape.dim(1);
+
+    auto base = reinterpret_cast<const float *>(inputBuffer);
+
+    for (int32_t n = 0; n < len; ++n)
+    {
+      auto from = base + n;
+      auto into =
+          reinterpret_cast<float *>(_outputTensor->ptr_to_element(::arm_compute::Coordinates{n}));
+
+      *into = *from;
+    }
+  }
+  else if (_tensorShape.rank() == 4)
+  {
+    auto featureShape = _tensorShape.asFeature();
+
+    const ::internal::nnapi::feature::Reader<float> from{featureShape, inputBuffer, inputSize};
+    ::internal::nnapi::feature::View<float> into{featureShape, outputBuffer, outputSize};
+
+    ::nnfw::util::feature::iterate(featureShape)
+        << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+             const auto value = from.at(batch, ch, row, col);
+             into.at(batch, ch, row, col) = value;
+           };
+  }
+}
+
+void TensorConvertFromCommonLayer::configure(::internal::common::Tensor *inputTensor,
+                                             ::internal::cpu::Tensor *outputTensor,
+                                             const Shape &tensorShape)
+{
+  _inputTensor = inputTensor;
+  _outputTensor = outputTensor;
+  _tensorShape = tensorShape;
+}
+
+void TensorConvertFromCommonLayer::run() { convert(); }
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
+
+#endif
diff --git a/runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.h b/runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.h
new file mode 100644
index 000000000..56f7bcf32
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/TensorConvertFromCommonLayer.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE
+//
+
+#if 0
+
+#ifndef __NEURUN_KERNEL_CPU_TENSOR_CONVERT_FROM_COMMON_LAYER_H__
+#define __NEURUN_KERNEL_CPU_TENSOR_CONVERT_FROM_COMMON_LAYER_H__
+
+#include <NeuralNetworks.h>
+
+#include <arm_compute/runtime/IFunction.h>
+
+#include "internal/Model.h"
+#include "internal/common/Tensor.h"
+#include "internal/cpu.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+class TensorConvertFromCommonLayer : public ::arm_compute::IFunction
+{
+public:
+  TensorConvertFromCommonLayer() {}
+
+public:
+  bool convert();
+
+  void configure(::internal::common::Tensor *inputTensor, ::internal::cpu::Tensor *outputTensor,
+                 const Shape &tensorShape);
+
+  void run();
+
+private:
+  ::internal::common::Tensor *_inputTensor;
+  ::internal::cpu::Tensor *_outputTensor;
+
+  Shape _tensorShape{1};
+};
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
+
+#endif // __NEURUN_KERNEL_CPU_TENSOR_CONVERT_FROM_COMMON_LAYER_H__
+
+#endif
diff --git a/runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.cc b/runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.cc
new file mode 100644
index 000000000..7d721f494
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.cc
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE
+//
+
+#if 0
+
+#include "TensorConvertToCommonLayer.h"
+
+#include "internal/nnapi/feature/Reader.h"
+#include "internal/nnapi/feature/View.h"
+
+#include <util/feature/IndexIterator.h>
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+bool TensorConvertToCommonLayer::convert()
+{
+  auto inputBuffer = _inputTensor->buffer();
+  auto inputSize = _inputTensor->info()->total_size();
+
+  auto outputBuffer = _outputTensor->buffer();
+  auto outputSize = _outputTensor->info()->total_size();
+
+  if (_tensorShape.rank() == 2)
+  {
+    const auto len = _tensorShape.dim(1);
+
+    auto base = reinterpret_cast<float *>(outputBuffer);
+
+    for (int32_t n = 0; n < len; ++n)
+    {
+      auto from = reinterpret_cast<const float *>(
+          _inputTensor->ptr_to_element(::arm_compute::Coordinates{n}));
+      auto into = base + n;
+
+      *into = *from;
+    }
+  }
+  else if (_tensorShape.rank() == 4)
+  {
+    auto featureShape = _tensorShape.asFeature();
+
+    const ::internal::nnapi::feature::Reader<float> from{featureShape, inputBuffer, inputSize};
+    ::internal::nnapi::feature::View<float> into{featureShape, outputBuffer, outputSize};
+
+    ::nnfw::util::feature::iterate(featureShape)
+        << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+             const auto value = from.at(batch, ch, row, col);
+             into.at(batch, ch, row, col) = value;
+           };
+  }
+}
+
+void TensorConvertToCommonLayer::configure(::internal::cpu::Tensor *inputTensor,
+                                           ::internal::common::Tensor *outputTensor,
+                                           const Shape &tensorShape)
+{
+  _inputTensor = inputTensor;
+  _outputTensor = outputTensor;
+  _tensorShape = tensorShape;
+}
+
+void TensorConvertToCommonLayer::run() { convert(); }
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
+
+#endif
diff --git a/runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.h b/runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.h
new file mode 100644
index 000000000..7e96d1aff
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/TensorConvertToCommonLayer.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// THIS FILE IS UNUSED BUT LEFT FOR FUTURE REFERNCE
+//
+
+#if 0
+
+#ifndef __NEURUN_KERNEL_CPU_TENSOR_CONVERT_TO_COMMON_LAYER_H__
+#define __NEURUN_KERNEL_CPU_TENSOR_CONVERT_TO_COMMON_LAYER_H__
+
+#include <NeuralNetworks.h>
+
+#include <arm_compute/runtime/IFunction.h>
+
+#include "internal/Model.h"
+#include "internal/common/Tensor.h"
+#include "internal/cpu.h"
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+class TensorConvertToCommonLayer : public ::arm_compute::IFunction
+{
+public:
+  TensorConvertToCommonLayer() {}
+
+public:
+  bool convert();
+
+  void configure(::internal::cpu::Tensor *inputTensor, ::internal::common::Tensor *outputTensor,
+                 const Shape &tensorShape);
+
+  void run();
+
+private:
+  ::internal::cpu::Tensor *_inputTensor;
+  ::internal::common::Tensor *_outputTensor;
+
+  Shape _tensorShape{1};
+};
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun
+
+#endif // __NEURUN_KERNEL_CPU_TENSOR_CONVERT_TO_COMMON_LAYER_H__
+
+#endif