164 files changed, 12047 insertions, 1703 deletions
diff --git a/runtimes/pure_arm_compute/src/compilation.cc b/runtimes/pure_arm_compute/src/compilation.cc
index 2488332a6..bed42529b 100644
--- a/runtimes/pure_arm_compute/src/compilation.cc
+++ b/runtimes/pure_arm_compute/src/compilation.cc
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file     compilation.cc
+ * @brief    This file contains ANeuralNetworksCompilation APIs and related classes
+ * @ingroup  COM_AI_RUNTIME
+ */
+
 #include <NeuralNetworks.h>
 
 // For CLKernelLibraryEx initialization
@@ -25,28 +31,42 @@
 #include <arm_compute/runtime/CL/CLScheduler.h>
 #include <arm_compute/runtime/CL/CLSubTensor.h>
 #include <arm_compute/runtime/CL/functions/CLArithmeticAddition.h>
-#include <arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h>
+#include <arm_compute/runtime/CL/functions/CLArithmeticSubtractionEx.h>
+#include <arm_compute/runtime/CL/functions/CLPadLayerEx.h>
 #include <arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h>
 #include <arm_compute/runtime/CL/functions/CLPixelWiseDivision.h>
 #include <arm_compute/runtime/CL/functions/CLPoolingLayer.h>
 #include <arm_compute/runtime/CL/functions/CLActivationLayer.h>
+#include <arm_compute/runtime/CL/functions/CLActivationLayerEx.h>
 #include <arm_compute/runtime/CL/functions/CLScale.h>
+#include <arm_compute/runtime/CL/functions/CLSpaceToBatchND.h>
+#include <arm_compute/runtime/CL/functions/CLSpaceToDepth.h>
 #include <arm_compute/runtime/CL/functions/CLReshapeLayer.h>
-#include <arm_compute/runtime/CL/functions/CLStridedSlice.h>
+#include <arm_compute/runtime/CL/functions/CLStridedSliceEx.h>
 #include <arm_compute/runtime/CL/functions/CLSoftmaxLayer.h>
 #include <arm_compute/runtime/CL/functions/CLGather.h>
+#include <arm_compute/runtime/CL/functions/CLHashtableLookup.h>
 #include <arm_compute/runtime/CL/functions/CLTopKV2.h>
-#include <arm_compute/runtime/CL/functions/CLReduceMax.h>
+#include <arm_compute/runtime/CL/functions/CLArgMinMax.h>
 #include <arm_compute/runtime/CL/functions/CLCast.h>
 #include <arm_compute/runtime/CL/functions/CLConvolutionLayer.h>
 #include <arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h>
 #include <arm_compute/runtime/CL/functions/CLDequantizationLayer.h>
-#include <arm_compute/runtime/CL/functions/CLReductionMean.h>
-#include <arm_compute/runtime/CL/functions/CLTranspose.h>
+#include <arm_compute/runtime/CL/functions/CLDepthToSpace.h>
+#include <arm_compute/runtime/CL/functions/CLPermuteEx.h>
+#include <arm_compute/runtime/CL/functions/CLReduceOperation.h>
 #include <arm_compute/runtime/CL/functions/CLRNNLayer.h>
 #include <arm_compute/runtime/CL/functions/CLFloor.h>
 #include <arm_compute/runtime/CL/functions/CLCopy.h>
-#include <arm_compute/runtime/CL/functions/CLNormalizationLayer.h>
+#include <arm_compute/runtime/CL/functions/CLNormalizationLayerEx.h>
+#include <arm_compute/runtime/CL/functions/CLExp.h>
+#include <arm_compute/runtime/CL/functions/CLBatchToSpaceND.h>
+#include <arm_compute/runtime/CL/functions/CLEmbeddingLookup.h>
+#include <arm_compute/runtime/CL/functions/CLSquaredDifference.h>
+#include <arm_compute/runtime/CL/functions/CLNeg.h>
+#include <arm_compute/runtime/CL/functions/CLPReLU.h>
+#include <arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h>
+#include <arm_compute/runtime/CL/functions/CLComparisonOp.h>
 
 #include <arm_compute/runtime/SubTensor.h>
 #include <arm_compute/runtime/NEON/functions/NESoftmaxLayer.h>
@@ -58,7 +78,7 @@
 #include <arm_compute/runtime/NEON/functions/NEConvolutionLayer.h>
 #include <arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h>
 #include <arm_compute/runtime/NEON/functions/NEFloor.h>
-#include <arm_compute/runtime/NEON/functions/NENormalizationLayer.h>
+#include <arm_compute/runtime/NEON/functions/NENormalizationLayerEx.h>
 #include <arm_compute/runtime/NEON/functions/NEReshapeLayer.h>
 
 #include "internal/arm_compute.h"
@@ -74,19 +94,28 @@
 #include "internal/arm_compute/tensor/View.h"
 #include "internal/layers/GenericReshapeLayer.h"
 #include "internal/layers/SimpleArithmeticAddition.h"
+#include "internal/layers/SimplePadLayer.h"
 #include "internal/layers/SimpleCastLayer.h"
+#include "internal/layers/SimpleTransposeConv.h"
 #include "internal/layers/GenericFullyConnectedLayer.h"
-#include "internal/layers/PadLayer.h"
 #include "internal/layers/SimpleSpaceToDepth.h"
 #include "internal/layers/SimpleEmbeddingLookup.h"
-#include "internal/layers/SquaredDifferenceOperation.h"
-
-#include "util/matrix/IndexIterator.h"
-#include "util/kernel/IndexIterator.h"
-#include "util/feature/IndexIterator.h"
-#include "util/tensor/IndexIterator.h"
-
-#include <nnfw/std/memory.h>
+#include "internal/layers/SimpleDepthToSpace.h"
+#include "internal/layers/SimpleBatchToSpaceNd.h"
+#include "internal/layers/SimpleHashtableLookupLayer.h"
+#include "internal/layers/SimplePackLayer.h"
+#include "internal/layers/SimpleSpaceToBatchND.h"
+#include "internal/layers/SimpleNeg.h"
+#include "internal/layers/SimpleUnpackLayer.h"
+#include "internal/layers/SimpleSQRT.h"
+#include "internal/layers/SimpleArgMinMax.h"
+
+#include "misc/matrix/IndexIterator.h"
+#include "misc/kernel/IndexIterator.h"
+#include "misc/feature/IndexIterator.h"
+#include "misc/tensor/IndexIterator.h"
+
+#include <cpp14/memory.h>
 
 #include "compilation.h"
 #include "model.h"
@@ -154,8 +183,7 @@ Padding valid_padding(void)
   return padding;
 }
 
-Padding same_padding(const nnfw::util::feature::Shape &ifm_shape,
-                     const nnfw::util::feature::Shape &ofm_shape, const Stride &stride, uint32_t kw,
+Padding same_padding(const nnfw::misc::feature::Shape &ifm_shape, const Stride &stride, uint32_t kw,
                      uint32_t kh)
 {
   Padding padding;
@@ -164,13 +192,16 @@ Padding same_padding(const nnfw::util::feature::Shape &ifm_shape,
   //
   // SAME padding. Padding on both ends are the "same":
   //
-  //	padding_to_beginning = total_padding / 2
-  //  padding_to_end = (total_padding + 1)/2.
+  // padding_to_beginning = total_padding / 2
+  // padding_to_end = (total_padding + 1)/2.
   //
-  const int32_t vertical_needed_input = (ofm_shape.H - 1) * stride.vertical + kh;
+  const int32_t out_size_height = (ifm_shape.H + stride.vertical - 1) / stride.vertical;
+  const int32_t out_size_width = (ifm_shape.W + stride.horizontal - 1) / stride.horizontal;
+
+  const int32_t vertical_needed_input = (out_size_height - 1) * stride.vertical + kh;
   const int32_t vertical_total_padding = std::max(0, vertical_needed_input - ifm_shape.H);
 
-  const int32_t horizontal_needed_input = (ofm_shape.W - 1) * stride.horizontal + kw;
+  const int32_t horizontal_needed_input = (out_size_width - 1) * stride.horizontal + kw;
   const int32_t horizontal_total_padding = std::max(0, horizontal_needed_input - ifm_shape.W);
 
   padding.top = vertical_total_padding / 2;
@@ -208,14 +239,14 @@ using namespace std::placeholders;
 
 template <typename T>
 static void initFeatureTensor(::arm_compute::ITensor &tensor,
-                              const nnfw::util::feature::Shape &feature_shape,
+                              const nnfw::misc::feature::Shape &feature_shape,
                               const uint8_t *feature_base, const size_t feature_size)
 {
   const ::internal::nnapi::feature::Reader<T> from{
       feature_shape, reinterpret_cast<const T *>(feature_base), feature_size};
   ::internal::arm_compute::feature::View<T> into{&tensor};
 
-  ::nnfw::util::feature::iterate(feature_shape)
+  ::nnfw::misc::feature::iterate(feature_shape)
       << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
            const auto value = from.at(batch, ch, row, col);
            into.at(batch, ch, row, col) = value;
@@ -241,29 +272,29 @@ static void initVectorTensor(::arm_compute::ITensor &tensor, const uint8_t *vec_
 
 template <typename T>
 static void initTensor3D(::arm_compute::ITensor &tensor,
-                         const nnfw::util::tensor::Shape &tensor_shape, const uint8_t *tensor_base,
+                         const nnfw::misc::tensor::Shape &tensor_shape, const uint8_t *tensor_base,
                          const size_t tensor_size)
 {
   const ::internal::nnapi::tensor::Reader<T> from{
       tensor_shape, reinterpret_cast<const T *>(tensor_base), tensor_size};
   ::internal::arm_compute::tensor::View<T> into{&tensor};
 
-  ::nnfw::util::tensor::iterate(tensor_shape) << [&](const nnfw::util::tensor::Index &index_nnapi) {
-    ::nnfw::util::tensor::Index index_ACL = ::nnfw::util::tensor::copy_reverse(index_nnapi);
+  ::nnfw::misc::tensor::iterate(tensor_shape) << [&](const nnfw::misc::tensor::Index &index_nnapi) {
+    ::nnfw::misc::tensor::Index index_ACL = ::nnfw::misc::tensor::copy_reverse(index_nnapi);
     into.at(index_ACL) = from.at(index_nnapi);
   };
 }
 
 template <typename T>
 static void initMatrixTensor(::arm_compute::ITensor &tensor,
-                             const nnfw::util::matrix::Shape &matrix_shape,
+                             const nnfw::misc::matrix::Shape &matrix_shape,
                              const uint8_t *matrix_base, const size_t matrix_size)
 {
   const ::internal::nnapi::matrix::Reader<T> from{
       matrix_shape, reinterpret_cast<const T *>(matrix_base), matrix_size};
   ::internal::arm_compute::matrix::View<T> into{&tensor};
 
-  ::nnfw::util::matrix::iterate(matrix_shape) << [&](uint32_t row, uint32_t col) {
+  ::nnfw::misc::matrix::iterate(matrix_shape) << [&](uint32_t row, uint32_t col) {
     const auto value = from.at(row, col);
     into.at(row, col) = value;
   };
@@ -288,34 +319,66 @@ static void initReorderVectorTensor(::arm_compute::ITensor &tensor, const uint8_
 
 template <typename T>
 static void initKernelTensor(::arm_compute::ITensor &tensor,
-                             const nnfw::util::kernel::Shape &kernel_shape,
+                             const nnfw::misc::kernel::Shape &kernel_shape,
                              const uint8_t *kernel_base, const size_t kernel_size)
 {
   const ::internal::nnapi::kernel::Reader<T> from{
       kernel_shape, reinterpret_cast<const T *>(kernel_base), kernel_size};
   ::internal::arm_compute::kernel::View<T> into{&tensor};
 
-  ::nnfw::util::kernel::iterate(kernel_shape)
+  ::nnfw::misc::kernel::iterate(kernel_shape)
       << [&](uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) {
            const auto value = from.at(nth, ch, row, col);
            into.at(nth, ch, row, col) = value;
          };
 }
 
+/**
+ * @brief Structure to provide interface methods of compilation plan builder
+ */
 struct IPlanBuilder
 {
+  /**
+   * @brief  Destruct IPlanBuilder object using default destructor
+   */
   virtual ~IPlanBuilder() = default;
 
+  /**
+   * @brief  Add TensorInfo with Shape Constraints
+   * @param [in] ind   Index of operand
+   * @param [in] info  TensorInfo value to set to index of operand
+   * @return  N/A
+   */
   virtual void addShapeConstr(const ::internal::tflite::operand::Index &ind,
                               const ::arm_compute::TensorInfo &info) = 0;
+  /**
+   * @brief  Add Subsumption constraints
+   * @param [in] ind  Index of operand
+   * @param [in] base  Index of base operand of Subsumption
+   * @param [in] offset  Offset of Subsumption
+   * @param [in] shape  Shape of Subsumption
+   * @param [in] extend_parent  extend_parent value of Subsumption
+   * @return  N/A
+   */
   virtual void addSubsumptionConstr(const ::internal::tflite::operand::Index &ind,
                                     const ::internal::tflite::operand::Index &base,
                                     const ::arm_compute::Coordinates &offset,
                                     const ::arm_compute::TensorShape &shape,
                                     bool extend_parent = false) = 0;
+  /**
+   * @brief  Add Initializer lambda with ITensor param
+   * @param [in] ind  Index of operand
+   * @param [in] initializer  Initializer to add
+   * @return  N/A
+   */
   virtual void addInitializer(const ::internal::tflite::operand::Index &ind,
                               const Initializer &initializer) = 0;
-  virtual void addStage(const Stage &) = 0;
+  /**
+   * @brief  Add Stage lambda with IAllocationContext and IExecutionBuilder params
+   * @param [in] stage Stage to add
+   * @return  N/A
+   */
+  virtual void addStage(const Stage &stage) = 0;
 };
 
 //
@@ -333,7 +396,6 @@ private:
   void appendReLU(::arm_compute::ITensor *tensor);
   void appendReLU6(::arm_compute::ITensor *tensor);
   void appendReLU1(::arm_compute::ITensor *tensor);
-  void appendTanh(::arm_compute::ITensor *tensor);
 
 public:
   void append(FuseCode code, ::arm_compute::ITensor *tensor);
@@ -349,7 +411,7 @@ void ActivationBuilder::appendReLU(::arm_compute::ITensor *ifm_alloc)
 
   if (::internal::arm_compute::isGpuMode())
   {
-    auto fn = nnfw::make_unique<::arm_compute::CLActivationLayer>();
+    auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
 
     fn->configure(CAST_CL(ifm_alloc), nullptr, act_info);
 
@@ -357,7 +419,7 @@ void ActivationBuilder::appendReLU(::arm_compute::ITensor *ifm_alloc)
   }
   else
   {
-    auto fn = nnfw::make_unique<::arm_compute::NEActivationLayer>();
+    auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
 
     fn->configure(ifm_alloc, nullptr, act_info);
 
@@ -372,7 +434,7 @@ void ActivationBuilder::appendReLU1(::arm_compute::ITensor *ifm_alloc)
 
   if (::internal::arm_compute::isGpuMode())
   {
-    auto fn = nnfw::make_unique<::arm_compute::CLActivationLayer>();
+    auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
 
     fn->configure(CAST_CL(ifm_alloc), nullptr, act_info);
 
@@ -380,7 +442,7 @@ void ActivationBuilder::appendReLU1(::arm_compute::ITensor *ifm_alloc)
   }
   else
   {
-    auto fn = nnfw::make_unique<::arm_compute::NEActivationLayer>();
+    auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
 
     fn->configure(ifm_alloc, nullptr, act_info);
 
@@ -395,7 +457,7 @@ void ActivationBuilder::appendReLU6(::arm_compute::ITensor *ifm_alloc)
 
   if (::internal::arm_compute::isGpuMode())
   {
-    auto fn = nnfw::make_unique<::arm_compute::CLActivationLayer>();
+    auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
 
     fn->configure(CAST_CL(ifm_alloc), nullptr, act_info);
 
@@ -403,7 +465,7 @@ void ActivationBuilder::appendReLU6(::arm_compute::ITensor *ifm_alloc)
   }
   else
   {
-    auto fn = nnfw::make_unique<::arm_compute::NEActivationLayer>();
+    auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
 
     fn->configure(ifm_alloc, nullptr, act_info);
 
@@ -411,23 +473,6 @@ void ActivationBuilder::appendReLU6(::arm_compute::ITensor *ifm_alloc)
   }
 }
 
-void ActivationBuilder::appendTanh(::arm_compute::ITensor *ifm_alloc)
-{
-  const ::arm_compute::ActivationLayerInfo act_info{
-      ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
-
-  if (::internal::arm_compute::isGpuMode())
-  {
-    auto fn = nnfw::make_unique<::arm_compute::CLActivationLayer>();
-
-    fn->configure(CAST_CL(ifm_alloc), nullptr, act_info);
-
-    _builder.append("Tanh", std::move(fn));
-  }
-  else
-    throw std::runtime_error("Not supported, yet");
-}
-
 void ActivationBuilder::append(FuseCode code, ::arm_compute::ITensor *ifm_alloc)
 {
   switch (code)
@@ -490,9 +535,11 @@ public:
   void visit(const ::internal::tflite::op::Softmax::Node &node) override;
   void visit(const ::internal::tflite::op::StridedSlice::Node &node) override;
   void visit(const ::internal::tflite::op::ReduceMax::Node &node) override;
+  void visit(const ::internal::tflite::op::ReduceMin::Node &node) override;
   void visit(const ::internal::tflite::op::Cast::Node &node) override;
   void visit(const ::internal::tflite::op::TopKV2::Node &node) override;
   void visit(const ::internal::tflite::op::Gather::Node &node) override;
+  void visit(const ::internal::tflite::op::PReLU::Node &node) override;
   void visit(const ::internal::tflite::op::ReLU::Node &node) override;
   void visit(const ::internal::tflite::op::ReLU1::Node &node) override;
   void visit(const ::internal::tflite::op::ReLU6::Node &node) override;
@@ -504,15 +551,33 @@ public:
   void visit(const ::internal::tflite::op::LSTM::Node &node) override;
   void visit(const ::internal::tflite::op::Floor::Node &node) override;
   void visit(const ::internal::tflite::op::Split::Node &node) override;
+  void visit(const ::internal::tflite::op::ArgMax::Node &node) override;
   void visit(const ::internal::tflite::op::RSQRT::Node &node) override;
+  void visit(const ::internal::tflite::op::SQRT::Node &node) override;
   void visit(const ::internal::tflite::op::Pad::Node &node) override;
   void visit(const ::internal::tflite::op::SpaceToDepth::Node &node) override;
+  void visit(const ::internal::tflite::op::SpaceToBatchND::Node &node) override;
+  void visit(const ::internal::tflite::op::BatchToSpaceNd::Node &node) override;
   void visit(const ::internal::tflite::op::L2Pool2D::Implicit::Node &node) override;
   void visit(const ::internal::tflite::op::L2Pool2D::Explicit::Node &node) override;
   void visit(const ::internal::tflite::op::EmbeddingLookup::Node &node) override;
   void visit(const ::internal::tflite::op::HashtableLookup::Node &node) override;
   void visit(const ::internal::tflite::op::L2Normalization::Node &node) override;
   void visit(const ::internal::tflite::op::SquaredDifference::Node &node) override;
+  void visit(const ::internal::tflite::op::LocalResponseNormalization::Node &node) override;
+  void visit(const ::internal::tflite::op::DepthToSpace::Node &node) override;
+  void visit(const ::internal::tflite::op::Unpack::Node &node) override;
+  void visit(const ::internal::tflite::op::Neg::Node &node) override;
+  void visit(const ::internal::tflite::op::Exp::Node &node) override;
+  void visit(const ::internal::tflite::op::ReduceSum::Node &node) override;
+  void visit(const ::internal::tflite::op::Equal::Node &node) override;
+  void visit(const ::internal::tflite::op::TransposeConv::Node &node) override;
+  void visit(const ::internal::tflite::op::Pack::Node &node) override;
+  void visit(const ::internal::tflite::op::Abs::Node &node) override;
+  void visit(const ::internal::tflite::op::NotEqual::Node &node) override;
+  void visit(const ::internal::tflite::op::LogicalAnd::Node &node) override;
+  void visit(const ::internal::tflite::op::LogicalNot::Node &node) override;
+  void visit(const ::internal::tflite::op::LogicalOr::Node &node) override;
 
 private:
   const ::internal::tflite::operand::Set &_ctx;
@@ -582,7 +647,7 @@ void Planner::visit(const ::internal::tflite::op::Add::Node &node)
       // NOTE SimpleArithmeticAddition does not support broadcasting
       assert(lhs_shape == rhs_shape);
 
-      auto l = nnfw::make_unique<SimpleArithmeticAddition>();
+      auto l = nnfw::cpp14::make_unique<SimpleArithmeticAddition>();
 
       l->configure(lhs_alloc, rhs_alloc, ofm_alloc);
 
@@ -592,7 +657,7 @@ void Planner::visit(const ::internal::tflite::op::Add::Node &node)
     {
       if (::internal::arm_compute::isGpuMode())
       {
-        auto l = nnfw::make_unique<::arm_compute::CLArithmeticAddition>();
+        auto l = nnfw::cpp14::make_unique<::arm_compute::CLArithmeticAddition>();
 
         // TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification
         l->configure(CAST_CL(lhs_alloc), CAST_CL(rhs_alloc), CAST_CL(ofm_alloc),
@@ -602,7 +667,7 @@ void Planner::visit(const ::internal::tflite::op::Add::Node &node)
       }
       else // NEON
       {
-        auto l = nnfw::make_unique<::arm_compute::NEArithmeticAddition>();
+        auto l = nnfw::cpp14::make_unique<::arm_compute::NEArithmeticAddition>();
 
         // TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification
         l->configure(lhs_alloc, rhs_alloc, ofm_alloc, ::arm_compute::ConvertPolicy::SATURATE);
@@ -672,7 +737,7 @@ void Planner::visit(const ::internal::tflite::op::Sub::Node &node)
 
     if (::internal::arm_compute::isGpuMode())
     {
-      auto fn = nnfw::make_unique<::arm_compute::CLArithmeticSubtraction>();
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLArithmeticSubtractionEx>();
 
       // TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification
       fn->configure(CAST_CL(lhs_alloc), CAST_CL(rhs_alloc), CAST_CL(ofm_alloc),
@@ -682,7 +747,7 @@ void Planner::visit(const ::internal::tflite::op::Sub::Node &node)
     }
     else // NEON
     {
-      auto fn = nnfw::make_unique<::arm_compute::NEArithmeticSubtraction>();
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::NEArithmeticSubtraction>();
 
       // TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification
       fn->configure(lhs_alloc, rhs_alloc, ofm_alloc, ::arm_compute::ConvertPolicy::SATURATE);
@@ -696,7 +761,6 @@ void Planner::visit(const ::internal::tflite::op::Sub::Node &node)
   _builder.addStage(stage);
 }
 
-// TODO: test with scalar*scalar, tensor bigger than 3D (e.g., 4D)
 void Planner::visit(const ::internal::tflite::op::Mul::Node &node)
 {
   const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
@@ -754,7 +818,7 @@ void Planner::visit(const ::internal::tflite::op::Mul::Node &node)
 
     if (::internal::arm_compute::isGpuMode())
     {
-      auto fn = nnfw::make_unique<::arm_compute::CLPixelWiseMultiplication>();
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPixelWiseMultiplication>();
 
       fn->configure(CAST_CL(lhs_input_alloc), CAST_CL(rhs_input_alloc), CAST_CL(output_alloc),
                     1.0, // scale
@@ -765,9 +829,9 @@ void Planner::visit(const ::internal::tflite::op::Mul::Node &node)
     }
     else // NEON
     {
-      auto fn = nnfw::make_unique<::arm_compute::NEPixelWiseMultiplication>();
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::NEPixelWiseMultiplication>();
 
-      fn->configure(CAST_NE(lhs_input_alloc), CAST_NE(rhs_input_alloc), CAST_NE(output_alloc),
+      fn->configure(lhs_input_alloc, rhs_input_alloc, output_alloc,
                     1.0, // scale
                     arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
 
@@ -836,11 +900,11 @@ void Planner::visit(const ::internal::tflite::op::Div::Node &node)
 
     if (::internal::arm_compute::isGpuMode())
     {
-      auto fn = nnfw::make_unique<::arm_compute::CLPixelWiseDivision>();
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPixelWiseDivision>();
 
-      // TODO Decide scale, overflow_policy, and rounding_policy.
-      //      Currently, the default values are used.
-      fn->configure(CAST_CL(lhs_alloc), CAST_CL(rhs_alloc), CAST_CL(ofm_alloc));
+      fn->configure(CAST_CL(lhs_alloc), CAST_CL(rhs_alloc), CAST_CL(ofm_alloc),
+                    1.0, // scale
+                    arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
 
       builder.append("Div", std::move(fn));
     }
@@ -975,7 +1039,7 @@ void Planner::visit(const ::internal::tflite::op::Conv2D::Implicit::Node &node)
 
   param.stride = stride;
   param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME)
-                      ? same_padding(ifm_shape, ofm_shape, stride, ker_shape.W, ker_shape.H)
+                      ? same_padding(ifm_shape, stride, ker_shape.W, ker_shape.H)
                       : valid_padding();
 
   param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
@@ -1255,7 +1319,7 @@ void Planner::visit(const ::internal::tflite::op::DepthwiseConv2D::Implicit::Nod
 
   param.stride = stride;
   param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME)
-                      ? same_padding(ifm_shape, ofm_shape, stride, ker_shape.W, ker_shape.H)
+                      ? same_padding(ifm_shape, stride, ker_shape.W, ker_shape.H)
                       : valid_padding();
 
   param.multipler = multiplier;
@@ -1293,7 +1357,7 @@ void Planner::visit(const ::internal::tflite::op::DepthwiseConv2D::Implicit::Nod
 
     if (::internal::arm_compute::isGpuMode())
     {
-      auto fn = nnfw::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
 
       fn->configure(CAST_CL(ifm_alloc), CAST_CL(ker_alloc), CAST_CL(bias_alloc), CAST_CL(ofm_alloc),
                     conv_info, param.multipler);
@@ -1302,7 +1366,7 @@ void Planner::visit(const ::internal::tflite::op::DepthwiseConv2D::Implicit::Nod
     }
     else
     {
-      auto fn = nnfw::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
 
       fn->configure(ifm_alloc, ker_alloc, bias_alloc, ofm_alloc, conv_info, param.multipler);
 
@@ -1436,7 +1500,7 @@ void Planner::visit(const ::internal::tflite::op::DepthwiseConv2D::Explicit::Nod
 
     if (::internal::arm_compute::isGpuMode())
     {
-      auto fn = nnfw::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
 
       fn->configure(CAST_CL(ifm_alloc), CAST_CL(ker_alloc), CAST_CL(bias_alloc), CAST_CL(ofm_alloc),
                     conv_info, param.multipler);
@@ -1445,7 +1509,7 @@ void Planner::visit(const ::internal::tflite::op::DepthwiseConv2D::Explicit::Nod
     }
     else
     {
-      auto fn = nnfw::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
 
       fn->configure(ifm_alloc, ker_alloc, bias_alloc, ofm_alloc, conv_info, param.multipler);
 
@@ -1499,7 +1563,7 @@ void Planner::visit(const ::internal::tflite::op::Dequantize::Node &node)
     if (from_env<bool>(std::getenv("USE_SIMPLE_CAST")))
     {
       // Use the CPU version of CAST operation
-      auto l = nnfw::make_unique<SimpleCastLayer>();
+      auto l = nnfw::cpp14::make_unique<SimpleCastLayer>();
 
       l->configure(input_alloc, output_alloc);
       fn = std::move(l);
@@ -1508,7 +1572,7 @@ void Planner::visit(const ::internal::tflite::op::Dequantize::Node &node)
     {
       if (::internal::arm_compute::isGpuMode())
       {
-        auto l = nnfw::make_unique<::arm_compute::CLCast>();
+        auto l = nnfw::cpp14::make_unique<::arm_compute::CLCast>();
 
         l->configure(CAST_CL(input_alloc), CAST_CL(output_alloc));
         fn = std::move(l);
@@ -1554,10 +1618,12 @@ void Planner::visit(const ::internal::tflite::op::MaxPool2D::Implicit::Node &nod
 
   // TODO Should move to the place where the operand is handled, if it is possible.
   // Set Shape Constraints and TensorInfo
-  _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()),
-                                                  _ctx.at(ofm_index).type()));
-  _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()),
-                                                  _ctx.at(ifm_index).type()));
+  _builder.addShapeConstr(
+      ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+                              _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+  _builder.addShapeConstr(
+      ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+                              _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
 
   // Construct operation parameters
   struct Param
@@ -1586,7 +1652,7 @@ void Planner::visit(const ::internal::tflite::op::MaxPool2D::Implicit::Node &nod
   param.stride.horizontal = hstride;
 
   param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME)
-                      ? same_padding(ifm_shape, ofm_shape, param.stride, kw, kh)
+                      ? same_padding(ifm_shape, param.stride, kw, kh)
                       : valid_padding();
   param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
 
@@ -1652,7 +1718,6 @@ void Planner::visit(const ::internal::tflite::op::MaxPool2D::Explicit::Node &nod
 
   const ::internal::tflite::operand::Index activation_index{node.param().activation_index};
 
-  // TODO 4D tensor (dim(0) !=1 )
   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
 
@@ -1669,10 +1734,12 @@ void Planner::visit(const ::internal::tflite::op::MaxPool2D::Explicit::Node &nod
 
   // TODO Should move to the place where the operand is handled, if it is possible.
   // Set Shape Constraints and TensorInfo
-  _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()),
-                                                  _ctx.at(ofm_index).type()));
-  _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()),
-                                                  _ctx.at(ifm_index).type()));
+  _builder.addShapeConstr(
+      ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+                              _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+  _builder.addShapeConstr(
+      ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+                              _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
 
   // Construct operation parameters
   struct Param
@@ -1782,10 +1849,12 @@ void Planner::visit(const ::internal::tflite::op::AvgPool2D::Implicit::Node &nod
 
   // TODO Should move to the place where the operand is handled, if it is possible.
   // Set Shape Constraints and TensorInfo
-  _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()),
-                                                  _ctx.at(ofm_index).type()));
-  _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()),
-                                                  _ctx.at(ifm_index).type()));
+  _builder.addShapeConstr(
+      ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+                              _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+  _builder.addShapeConstr(
+      ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+                              _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
 
   // Construct operation parameters
   struct Param
@@ -1814,7 +1883,7 @@ void Planner::visit(const ::internal::tflite::op::AvgPool2D::Implicit::Node &nod
   param.stride.horizontal = hstride;
 
   param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME)
-                      ? same_padding(ifm_shape, ofm_shape, param.stride, kw, kh)
+                      ? same_padding(ifm_shape, param.stride, kw, kh)
                       : valid_padding();
 
   param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
@@ -1882,7 +1951,6 @@ void Planner::visit(const ::internal::tflite::op::AvgPool2D::Explicit::Node &nod
 
   const ::internal::tflite::operand::Index activation_index{node.param().activation_index};
 
-  // TODO 4D tensor (dim(0) != 1)
   const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
   const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
 
@@ -1899,10 +1967,12 @@ void Planner::visit(const ::internal::tflite::op::AvgPool2D::Explicit::Node &nod
 
   // TODO Should move to the place where the operand is handled, if it is possible.
   // Set Shape Constraints and TensorInfo
-  _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()),
-                                                  _ctx.at(ofm_index).type()));
-  _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()),
-                                                  _ctx.at(ifm_index).type()));
+  _builder.addShapeConstr(
+      ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+                              _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+  _builder.addShapeConstr(
+      ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+                              _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
 
   // Construct operation parameters
   struct Param
@@ -1998,8 +2068,9 @@ void Planner::visit(const ::internal::tflite::op::Concat::Node &node)
   }
 
   // Set Shape Constraints and TensorInfo (for output)
-  _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()),
-                                                  _ctx.at(ofm_index).type()));
+  _builder.addShapeConstr(
+      ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+                              _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
 
   // Set Shape Constraints and TensorInfo (for input)
   const uint32_t coord_index = ToARMComputeAxis(input_rank, axis).value();
@@ -2060,7 +2131,7 @@ void Planner::visit(const ::internal::tflite::op::FullyConnected::Node &node)
   internal::tflite::operand::Shape reshape(2);
   if (input_rank == 4)
   {
-    nnfw::util::feature::Shape ifm_shape_feature = _ctx.at(input_index).shape().asFeature();
+    nnfw::misc::feature::Shape ifm_shape_feature = _ctx.at(input_index).shape().asFeature();
     auto feature_size =
         ifm_shape_feature.N * ifm_shape_feature.C * ifm_shape_feature.H * ifm_shape_feature.W;
     assert(feature_size == batch_size * input_size);
@@ -2078,7 +2149,7 @@ void Planner::visit(const ::internal::tflite::op::FullyConnected::Node &node)
   else if (input_rank == 2)
   {
     auto ifm_shape = _ctx.at(input_index).shape();
-    nnfw::util::matrix::Shape ifm_shape_matrix = ifm_shape.asMatrix();
+    nnfw::misc::matrix::Shape ifm_shape_matrix = ifm_shape.asMatrix();
     assert(ifm_shape.dim(0) == batch_size);
     assert(ifm_shape.dim(1) == input_size);
 
@@ -2131,7 +2202,7 @@ void Planner::visit(const ::internal::tflite::op::FullyConnected::Node &node)
     auto weight_alloc = ctx.at(::internal::tflite::operand::Index{param.weight_index});
     auto bias_alloc = ctx.at(::internal::tflite::operand::Index{param.bias_index});
 
-    auto fn = nnfw::make_unique<GenericFullyConnectedLayer>();
+    auto fn = nnfw::cpp14::make_unique<GenericFullyConnectedLayer>();
 
     fn->configure(input_alloc, weight_alloc, bias_alloc, output_alloc, needs_reshape,
                   asTensorShape(reshape));
@@ -2154,10 +2225,12 @@ void Planner::visit(const ::internal::tflite::op::ResizeBilinear::Node &node)
 
   // TODO Should move to the place where the operand is handled, if it is possible.
   // Set Shape Constraints
-  _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()),
-                                                  _ctx.at(ofm_index).type()));
-  _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()),
-                                                  _ctx.at(ifm_index).type()));
+  _builder.addShapeConstr(
+      ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+                              _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+  _builder.addShapeConstr(
+      ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+                              _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
 
   struct Param
   {
@@ -2181,7 +2254,7 @@ void Planner::visit(const ::internal::tflite::op::ResizeBilinear::Node &node)
 
     if (::internal::arm_compute::isGpuMode())
     {
-      auto fn = nnfw::make_unique<::arm_compute::CLScale>();
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLScale>();
 
       fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc),
                     ::arm_compute::InterpolationPolicy::BILINEAR,
@@ -2202,18 +2275,19 @@ void Planner::visit(const ::internal::tflite::op::Reshape::Node &node)
   const ::internal::tflite::operand::Index output_index{node.param().output_index};
   const ::internal::tflite::operand::Index input_index{node.param().input_index};
 
-  // NOTE The content of a tensor specified by shape_index should be aligned with
-  //      output tensor shape
-  // TODO Check consistency of ouput shape
+  auto input_shape = asTensorShape(_ctx.at(input_index).shape());
+  auto output_shape = asTensorShape(_ctx.at(output_index).shape());
 
-  // TODO Re-enable this assert
-  // assert((ifm_shape.C * ifm_shape.H * ifm_shape.W) == out_size);
+  assert(input_shape[0] * input_shape[1] * input_shape[2] * input_shape[3] ==
+         output_shape[0] * output_shape[1] * output_shape[2] * output_shape[3]);
 
   // TODO Should move to the place where the operand is handled, if it is possible.
-  _builder.addShapeConstr(output_index, asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
-                                                     _ctx.at(output_index).type()));
-  _builder.addShapeConstr(input_index, asTensorInfo(asTensorShape(_ctx.at(input_index).shape()),
-                                                    _ctx.at(input_index).type()));
+  _builder.addShapeConstr(output_index, asTensorInfo(output_shape, _ctx.at(output_index).type(),
+                                                     _ctx.at(output_index).scale(),
+                                                     _ctx.at(output_index).zeroPoint()));
+  _builder.addShapeConstr(input_index, asTensorInfo(input_shape, _ctx.at(input_index).type(),
+                                                    _ctx.at(input_index).scale(),
+                                                    _ctx.at(input_index).zeroPoint()));
 
   struct Param
   {
@@ -2233,7 +2307,7 @@ void Planner::visit(const ::internal::tflite::op::Reshape::Node &node)
     if (::internal::arm_compute::isGpuMode())
     {
       // GenericReshape first apply NCHW->NHWC permutation, and apply reshape
-      auto fn = nnfw::make_unique<GenericReshapeLayer>();
+      auto fn = nnfw::cpp14::make_unique<GenericReshapeLayer>();
 
       fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc));
 
@@ -2241,7 +2315,7 @@ void Planner::visit(const ::internal::tflite::op::Reshape::Node &node)
     }
     else
     {
-      auto fn = nnfw::make_unique<GenericReshapeLayer>();
+      auto fn = nnfw::cpp14::make_unique<GenericReshapeLayer>();
 
       fn->configure(input_alloc, output_alloc);
 
@@ -2259,19 +2333,15 @@ void Planner::visit(const ::internal::tflite::op::Squeeze::Node &node)
   const ::internal::tflite::operand::Index output_index{node.param().output_index};
   const ::internal::tflite::operand::Index input_index{node.param().input_index};
 
-  // Currently, 3D-input with dims is tested. Note that param(). dims_index_optional is optional.
-  // two generated test passed:
-  //   - 3D input : squeeze_float_1
-  //   - 2D input : squeeze_3D_float_1
-  //   - 4D input fails (squeeze.mod.py) -> we need general tensor support
-
-  // TODO Support generic tensor shape
-
   // Set Shape Constraints
-  _builder.addShapeConstr(output_index, asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
-                                                     _ctx.at(output_index).type()));
-  _builder.addShapeConstr(input_index, asTensorInfo(asTensorShape(_ctx.at(input_index).shape()),
-                                                    _ctx.at(input_index).type()));
+  _builder.addShapeConstr(output_index,
+                          asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+                                       _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+                                       _ctx.at(output_index).zeroPoint()));
+  _builder.addShapeConstr(input_index,
+                          asTensorInfo(asTensorShape(_ctx.at(input_index).shape()),
+                                       _ctx.at(input_index).type(), _ctx.at(input_index).scale(),
+                                       _ctx.at(input_index).zeroPoint()));
 
   // Construct operation parameters
   struct Param
@@ -2291,7 +2361,7 @@ void Planner::visit(const ::internal::tflite::op::Squeeze::Node &node)
 
     if (::internal::arm_compute::isGpuMode())
     {
-      auto fn = nnfw::make_unique<::arm_compute::CLReshapeLayer>();
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReshapeLayer>();
 
       fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc));
 
@@ -2299,7 +2369,7 @@ void Planner::visit(const ::internal::tflite::op::Squeeze::Node &node)
     }
     else
     {
-      auto fn = nnfw::make_unique<::arm_compute::NEReshapeLayer>();
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::NEReshapeLayer>();
 
       fn->configure(input_alloc, output_alloc);
 
@@ -2350,7 +2420,7 @@ void Planner::visit(const ::internal::tflite::op::Softmax::Node &node)
 
     if (::internal::arm_compute::isGpuMode())
     {
-      auto fn = nnfw::make_unique<::arm_compute::CLSoftmaxLayer>();
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLSoftmaxLayer>();
 
       fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc), param.scale);
 
@@ -2358,7 +2428,7 @@ void Planner::visit(const ::internal::tflite::op::Softmax::Node &node)
     }
     else
     {
-      auto fn = nnfw::make_unique<::arm_compute::NESoftmaxLayer>();
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::NESoftmaxLayer>();
 
       fn->configure(input_alloc, output_alloc, param.scale);
 
@@ -2397,14 +2467,18 @@ void Planner::visit(const ::internal::tflite::op::StridedSlice::Node &node)
   assert(_ctx.at(startData_index).shape().rank() == 1);
   assert(_ctx.at(endData_index).shape().rank() == 1);
   assert(_ctx.at(stridesData_index).shape().rank() == 1);
-  _builder.addShapeConstr(startData_index,
-                          asTensorInfo(asTensorShape(_ctx.at(startData_index).shape()),
-                                       _ctx.at(startData_index).type()));
+  _builder.addShapeConstr(
+      startData_index,
+      asTensorInfo(asTensorShape(_ctx.at(startData_index).shape()), _ctx.at(startData_index).type(),
+                   _ctx.at(startData_index).scale(), _ctx.at(startData_index).zeroPoint()));
   _builder.addShapeConstr(endData_index, asTensorInfo(asTensorShape(_ctx.at(endData_index).shape()),
-                                                      _ctx.at(endData_index).type()));
-  _builder.addShapeConstr(stridesData_index,
-                          asTensorInfo(asTensorShape(_ctx.at(endData_index).shape()),
-                                       _ctx.at(stridesData_index).type()));
+                                                      _ctx.at(endData_index).type(),
+                                                      _ctx.at(endData_index).scale(),
+                                                      _ctx.at(endData_index).zeroPoint()));
+  _builder.addShapeConstr(
+      stridesData_index,
+      asTensorInfo(asTensorShape(_ctx.at(endData_index).shape()), _ctx.at(stridesData_index).type(),
+                   _ctx.at(stridesData_index).scale(), _ctx.at(stridesData_index).zeroPoint()));
 
   // Set initializers for indices data such as order of inputData
   {
@@ -2469,7 +2543,7 @@ void Planner::visit(const ::internal::tflite::op::StridedSlice::Node &node)
 
     if (::internal::arm_compute::isGpuMode())
     {
-      auto fn = nnfw::make_unique<::arm_compute::CLStridedSlice>();
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLStridedSliceEx>();
 
       fn->configure(CAST_CL(inputData_alloc), CAST_CL(outputData_alloc), CAST_CL(startData_alloc),
                     CAST_CL(endData_alloc), CAST_CL(stridesData_alloc), param.beginMask,
@@ -2484,6 +2558,133 @@ void Planner::visit(const ::internal::tflite::op::StridedSlice::Node &node)
   _builder.addStage(stage);
 }
 
+void Planner::visit(const ::internal::tflite::op::ReduceMin::Node &node)
+{
+  VERBOSE(ReduceMin) << "Configure REDUCEMIN operation" << std::endl;
+
+  const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+  const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+  const ::internal::tflite::operand::Index axis_index{node.param().axis_index};
+
+  auto ifm_shape = _ctx.at(ifm_index).shape();
+  auto ofm_shape = _ctx.at(ofm_index).shape();
+  auto axis_shape = _ctx.at(axis_index).shape();
+  assert(ifm_shape.rank() <= 4);
+  assert(ofm_shape.rank() <= ifm_shape.rank());
+  assert(_ctx.at(axis_index).hasData());
+  assert(axis_shape.rank() == 0 || axis_shape.rank() == 1);
+
+  // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only
+  // supports cases reducing height and width or reducing depth.
+  // TODO We have to support all cases of dimensions up to 4.
+  // For correct permuting, we have to set output's shape to be equal in dimension position of the
+  // input. But the positions of the same dimensions in the input and output may be set differently.
+  // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original
+  // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to
+  // extend it in 4 dimensions, it should be {1,1,3,5}.
+  // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of
+  // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the
+  // next operation is not desired.
+  if (ifm_shape.rank() == 4 && ifm_shape.rank() != ofm_shape.rank())
+  {
+    if (ofm_shape.rank() == 2)
+    {
+      // Reducing HW
+      assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(3) == ofm_shape.dim(1));
+    }
+    else if (ofm_shape.rank() == 3)
+    {
+      // Reducing C or
+      // (Reducing H and C(ifm and ofm) == 1) or (Reducing W and C(ifm and ofm) == 1)
+      assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(1) == ofm_shape.dim(1) &&
+                 ifm_shape.dim(2) == ofm_shape.dim(2) ||
+             (ifm_shape.dim(0) == ofm_shape.dim(0) &&
+              (ifm_shape.dim(1) == ofm_shape.dim(1) || ifm_shape.dim(2) == ofm_shape.dim(1)) &&
+              ifm_shape.dim(3) == 1 && ofm_shape.dim(2) == 1));
+    }
+  }
+
+  _builder.addShapeConstr(
+      ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+                              _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+  _builder.addShapeConstr(
+      ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+                              _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+  std::set<uint32_t> axis;
+  {
+    const auto ifm_rank = ifm_shape.rank();
+    switch (axis_shape.rank())
+    {
+      case 0: // scalar
+      {
+        int32_t axis_value = _ctx.at(axis_index).asScalar<int32_t>();
+        if (axis_value < 0)
+        {
+          axis_value += ifm_rank;
+        }
+        axis.insert(ToARMComputeAxis(ifm_rank, axis_value).value());
+        break;
+      }
+      case 1: // vector
+      {
+        const auto axis_base = _ctx.at(axis_index).data().base();
+        const auto axis_size = _ctx.at(axis_index).shape().asVector();
+
+        // If axis's data does not exist as constant values and can be gotten as input data, we have
+        // to find a way to infer output shape when sinking output.
+        assert(axis_base != nullptr);
+        for (uint32_t n = 0; n < axis_size; ++n)
+        {
+          int32_t axis_value = *(reinterpret_cast<const int32_t *>(axis_base) + n);
+          if (axis_value < 0)
+          {
+            axis_value += ifm_rank;
+          }
+          axis.insert(ToARMComputeAxis(ifm_rank, axis_value).value());
+        }
+        break;
+      }
+      default:
+        throw std::runtime_error("Not supported");
+        break;
+    }
+  }
+
+  // Construct operation parameters
+  struct Param
+  {
+    int ofm_index;
+    int ifm_index;
+    std::set<uint32_t> axis;
+  };
+
+  Param param;
+
+  param.ofm_index = ofm_index.asInt();
+  param.ifm_index = ifm_index.asInt();
+  param.axis = axis;
+
+  auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+    auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+    auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+    if (::internal::arm_compute::isGpuMode())
+    {
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReduceOperation>();
+
+      fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis,
+                    ::arm_compute::ReduceOperation::MIN);
+
+      builder.append("ReduceMin", std::move(fn));
+    }
+    else
+      throw std::runtime_error("Not supported, yet");
+  };
+
+  _builder.addStage(stage);
+}
+
 void Planner::visit(const ::internal::tflite::op::ReduceMax::Node &node)
 {
   VERBOSE(ReduceMax) << "Configure REDUCEMAX operation" << std::endl;
@@ -2492,43 +2693,104 @@ void Planner::visit(const ::internal::tflite::op::ReduceMax::Node &node)
   const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
   const ::internal::tflite::operand::Index axis_index{node.param().axis_index};
 
-  // Handle special case only:
-  //   Input: Matrix (rank 2)
-  //   Output: Vector (rank 1)
-  //   Axis: one element (scalar or rank 1 with 1 element), constant
   auto ifm_shape = _ctx.at(ifm_index).shape();
   auto ofm_shape = _ctx.at(ofm_index).shape();
   auto axis_shape = _ctx.at(axis_index).shape();
-  assert(ofm_shape.rank() == 1);
-  assert(ifm_shape.rank() == 2);
+  assert(ifm_shape.rank() <= 4);
+  assert(ofm_shape.rank() <= ifm_shape.rank());
   assert(_ctx.at(axis_index).hasData());
-  assert(axis_shape.rank() == 0 || ((axis_shape.rank() == 1) && (axis_shape.dim(0) == 1)));
+  assert(axis_shape.rank() == 0 || axis_shape.rank() == 1);
+
+  // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only
+  // supports cases reducing height and width or reducing depth.
+  // TODO We have to support all cases of dimensions up to 4.
+  // For correct permuting, we have to set output's shape to be equal in dimension position of the
+  // input. But the positions of the same dimensions in the input and output may be set differently.
+  // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original
+  // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to
+  // extend it in 4 dimensions, it should be {1,1,3,5}.
+  // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of
+  // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the
+  // next operation is not desired.
+  if (ifm_shape.rank() == 4 && ifm_shape.rank() != ofm_shape.rank())
+  {
+    if (ofm_shape.rank() == 2)
+    {
+      // Reducing HW
+      assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(3) == ofm_shape.dim(1));
+    }
+    else if (ofm_shape.rank() == 3)
+    {
+      // Reducing C or
+      // (Reducing H and C(ifm and ofm) == 1) or (Reducing W and C(ifm and ofm) == 1)
+      assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(1) == ofm_shape.dim(1) &&
+                 ifm_shape.dim(2) == ofm_shape.dim(2) ||
+             (ifm_shape.dim(0) == ofm_shape.dim(0) &&
+              (ifm_shape.dim(1) == ofm_shape.dim(1) || ifm_shape.dim(2) == ofm_shape.dim(1)) &&
+              ifm_shape.dim(3) == 1 && ofm_shape.dim(2) == 1));
+    }
+  }
 
-  _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()),
-                                                  _ctx.at(ofm_index).type()));
-  _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()),
-                                                  _ctx.at(ifm_index).type()));
+  _builder.addShapeConstr(
+      ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+                              _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+  _builder.addShapeConstr(
+      ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+                              _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+  std::set<uint32_t> axis;
+  {
+    const auto ifm_rank = ifm_shape.rank();
+    switch (axis_shape.rank())
+    {
+      case 0: // scalar
+      {
+        int32_t axis_value = _ctx.at(axis_index).asScalar<int32_t>();
+        if (axis_value < 0)
+        {
+          axis_value += ifm_rank;
+        }
+        axis.insert(ToARMComputeAxis(ifm_rank, axis_value).value());
+        break;
+      }
+      case 1: // vector
+      {
+        const auto axis_base = _ctx.at(axis_index).data().base();
+        const auto axis_size = _ctx.at(axis_index).shape().asVector();
 
-  // Note: Assume only one element in axis. It is checked by assertion above
-  // TODO: handle general case
-  // Axis is integer value (generally, int32)
-  int32_t axis_value = _ctx.at(axis_index).asScalar<int32_t>();
-  assert(axis_value == 1);
+        // If axis's data does not exist as constant values and can be gotten as input data, we have
+        // to find a way to infer output shape when sinking output.
+        assert(axis_base != nullptr);
+        for (uint32_t n = 0; n < axis_size; ++n)
+        {
+          int32_t axis_value = *(reinterpret_cast<const int32_t *>(axis_base) + n);
+          if (axis_value < 0)
+          {
+            axis_value += ifm_rank;
+          }
+          axis.insert(ToARMComputeAxis(ifm_rank, axis_value).value());
+        }
+        break;
+      }
+      default:
+        throw std::runtime_error("Not supported");
+        break;
+    }
+  }
 
   // Construct operation parameters
   struct Param
   {
     int ofm_index;
     int ifm_index;
-
-    int32_t axis;
+    std::set<uint32_t> axis;
   };
 
   Param param;
 
   param.ofm_index = ofm_index.asInt();
   param.ifm_index = ifm_index.asInt();
-  param.axis = axis_value;
+  param.axis = axis;
 
   auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
     auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
@@ -2536,9 +2798,10 @@ void Planner::visit(const ::internal::tflite::op::ReduceMax::Node &node)
 
     if (::internal::arm_compute::isGpuMode())
     {
-      auto fn = nnfw::make_unique<::arm_compute::CLReduceMax>();
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReduceOperation>();
 
-      fn->configure(CAST_CL(ifm_alloc), param.axis, CAST_CL(ofm_alloc));
+      fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis,
+                    ::arm_compute::ReduceOperation::MAX);
 
       builder.append("ReduceMax", std::move(fn));
     }
@@ -2586,7 +2849,7 @@ void Planner::visit(const ::internal::tflite::op::Cast::Node &node)
     if (from_env<bool>(std::getenv("USE_SIMPLE_CAST")))
     {
       // Use the CPU version of CAST operation
-      auto l = nnfw::make_unique<SimpleCastLayer>();
+      auto l = nnfw::cpp14::make_unique<SimpleCastLayer>();
 
       l->configure(input_alloc, output_alloc);
       fn = std::move(l);
@@ -2595,7 +2858,7 @@ void Planner::visit(const ::internal::tflite::op::Cast::Node &node)
     {
       if (::internal::arm_compute::isGpuMode())
       {
-        auto l = nnfw::make_unique<::arm_compute::CLCast>();
+        auto l = nnfw::cpp14::make_unique<::arm_compute::CLCast>();
 
         l->configure(CAST_CL(input_alloc), CAST_CL(output_alloc));
         fn = std::move(l);
@@ -2627,13 +2890,18 @@ void Planner::visit(const ::internal::tflite::op::TopKV2::Node &node)
   // Set shape constraints
   _builder.addShapeConstr(outputValues_index,
                           asTensorInfo(asTensorShape(_ctx.at(outputValues_index).shape()),
-                                       _ctx.at(outputValues_index).type()));
+                                       _ctx.at(outputValues_index).type(),
+                                       _ctx.at(outputValues_index).scale(),
+                                       _ctx.at(outputValues_index).zeroPoint()));
   _builder.addShapeConstr(outputIndices_index,
                           asTensorInfo(asTensorShape(_ctx.at(outputIndices_index).shape()),
-                                       _ctx.at(outputIndices_index).type()));
-  _builder.addShapeConstr(inputData_index,
-                          asTensorInfo(asTensorShape(_ctx.at(inputData_index).shape()),
-                                       _ctx.at(inputData_index).type()));
+                                       _ctx.at(outputIndices_index).type(),
+                                       _ctx.at(outputIndices_index).scale(),
+                                       _ctx.at(outputIndices_index).zeroPoint()));
+  _builder.addShapeConstr(
+      inputData_index,
+      asTensorInfo(asTensorShape(_ctx.at(inputData_index).shape()), _ctx.at(inputData_index).type(),
+                   _ctx.at(inputData_index).scale(), _ctx.at(inputData_index).zeroPoint()));
 
   // Construct operation parameters
   struct Param
@@ -2659,7 +2927,7 @@ void Planner::visit(const ::internal::tflite::op::TopKV2::Node &node)
 
     if (::internal::arm_compute::isGpuMode())
     {
-      auto fn = nnfw::make_unique<::arm_compute::CLTopKV2>();
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLTopKV2>();
 
       fn->configure(CAST_CL(input_alloc), param.k, CAST_CL(values_alloc), CAST_CL(indices_alloc));
 
@@ -2686,12 +2954,15 @@ void Planner::visit(const ::internal::tflite::op::Gather::Node &node)
   assert(_ctx.at(rhs_index).shape().rank() == 1);
 
   // Set Shape Constraints
-  _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()),
-                                                  _ctx.at(ofm_index).type()));
-  _builder.addShapeConstr(lhs_index, asTensorInfo(asTensorShape(_ctx.at(lhs_index).shape()),
-                                                  _ctx.at(lhs_index).type()));
-  _builder.addShapeConstr(rhs_index, asTensorInfo(asTensorShape(_ctx.at(rhs_index).shape()),
-                                                  _ctx.at(rhs_index).type()));
+  _builder.addShapeConstr(
+      ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+                              _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+  _builder.addShapeConstr(
+      lhs_index, asTensorInfo(asTensorShape(_ctx.at(lhs_index).shape()), _ctx.at(lhs_index).type(),
+                              _ctx.at(lhs_index).scale(), _ctx.at(lhs_index).zeroPoint()));
+  _builder.addShapeConstr(
+      rhs_index, asTensorInfo(asTensorShape(_ctx.at(rhs_index).shape()), _ctx.at(rhs_index).type(),
+                              _ctx.at(rhs_index).scale(), _ctx.at(ofm_index).zeroPoint()));
 
   // Construct operation parameters
   struct Param
@@ -2720,7 +2991,7 @@ void Planner::visit(const ::internal::tflite::op::Gather::Node &node)
     {
       std::unique_ptr<::arm_compute::IFunction> fn;
 
-      auto l = nnfw::make_unique<::arm_compute::CLGather>();
+      auto l = nnfw::cpp14::make_unique<::arm_compute::CLGather>();
       l->configure(CAST_CL(lhs_alloc), CAST_CL(rhs_alloc), CAST_CL(ofm_alloc));
       fn = std::move(l);
       builder.append("Gather", std::move(fn));
@@ -2732,6 +3003,62 @@ void Planner::visit(const ::internal::tflite::op::Gather::Node &node)
   _builder.addStage(stage);
 }
 
+void Planner::visit(const ::internal::tflite::op::PReLU::Node &node)
+{
+  VERBOSE(PReLU) << "Configure PReLU operation" << std::endl;
+
+  const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+  const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+  const ::internal::tflite::operand::Index alpha_index{node.param().alpha_index};
+
+  // Set shape constraints
+  _builder.addShapeConstr(
+      ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+                              _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+  _builder.addShapeConstr(
+      ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+                              _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+  _builder.addShapeConstr(alpha_index,
+                          asTensorInfo(asTensorShape(_ctx.at(alpha_index).shape()),
+                                       _ctx.at(alpha_index).type(), _ctx.at(alpha_index).scale(),
+                                       _ctx.at(alpha_index).zeroPoint()));
+
+  struct Param
+  {
+    int ofm_index;
+    int ifm_index;
+    int alpha_index;
+  };
+
+  Param param;
+
+  param.ofm_index = ofm_index.asInt();
+  param.ifm_index = ifm_index.asInt();
+  param.alpha_index = alpha_index.asInt();
+
+  auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+    auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+    auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+    auto alpha_alloc = ctx.at(::internal::tflite::operand::Index{param.alpha_index});
+
+    if (::internal::arm_compute::isGpuMode())
+    {
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPReLU>();
+      fn->configure(CAST_CL(ifm_alloc), CAST_CL(alpha_alloc), CAST_CL(ofm_alloc));
+      builder.append("PReLU", std::move(fn));
+    }
+    else
+    {
+      // TODO Add NEON support
+
+      throw std::runtime_error("Not supported, yet");
+    }
+  };
+
+  _builder.addStage(stage);
+}
+
 void Planner::visit(const ::internal::tflite::op::ReLU::Node &node)
 {
   VERBOSE(ReLU) << "Configure ReLU operation" << std::endl;
@@ -2767,7 +3094,7 @@ void Planner::visit(const ::internal::tflite::op::ReLU::Node &node)
 
     if (::internal::arm_compute::isGpuMode())
     {
-      auto fn = nnfw::make_unique<::arm_compute::CLActivationLayer>();
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
 
       fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), act_info);
 
@@ -2775,7 +3102,7 @@ void Planner::visit(const ::internal::tflite::op::ReLU::Node &node)
     }
     else
     {
-      auto fn = nnfw::make_unique<::arm_compute::NEActivationLayer>();
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
 
       fn->configure(ifm_alloc, ofm_alloc, act_info);
 
@@ -2821,7 +3148,7 @@ void Planner::visit(const ::internal::tflite::op::ReLU1::Node &node)
 
     if (::internal::arm_compute::isGpuMode())
     {
-      auto fn = nnfw::make_unique<::arm_compute::CLActivationLayer>();
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
 
       fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), act_info);
 
@@ -2829,7 +3156,7 @@ void Planner::visit(const ::internal::tflite::op::ReLU1::Node &node)
     }
     else
     {
-      auto fn = nnfw::make_unique<::arm_compute::NEActivationLayer>();
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
 
       fn->configure(ifm_alloc, ofm_alloc, act_info);
 
@@ -2875,7 +3202,7 @@ void Planner::visit(const ::internal::tflite::op::ReLU6::Node &node)
 
     if (::internal::arm_compute::isGpuMode())
     {
-      auto fn = nnfw::make_unique<::arm_compute::CLActivationLayer>();
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
 
       fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), act_info);
 
@@ -2883,7 +3210,7 @@ void Planner::visit(const ::internal::tflite::op::ReLU6::Node &node)
     }
     else
     {
-      auto fn = nnfw::make_unique<::arm_compute::NEActivationLayer>();
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
 
       fn->configure(ifm_alloc, ofm_alloc, act_info);
 
@@ -2902,10 +3229,12 @@ void Planner::visit(const ::internal::tflite::op::Tanh::Node &node)
   const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
 
   // Set shape constraints
-  _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()),
-                                                  _ctx.at(ofm_index).type()));
-  _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()),
-                                                  _ctx.at(ifm_index).type()));
+  _builder.addShapeConstr(
+      ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+                              _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+  _builder.addShapeConstr(
+      ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+                              _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
 
   struct Param
   {
@@ -2927,7 +3256,7 @@ void Planner::visit(const ::internal::tflite::op::Tanh::Node &node)
 
     if (::internal::arm_compute::isGpuMode())
     {
-      auto fn = nnfw::make_unique<::arm_compute::CLActivationLayer>();
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
 
       fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), act_info);
 
@@ -2935,7 +3264,7 @@ void Planner::visit(const ::internal::tflite::op::Tanh::Node &node)
     }
     else
     {
-      auto fn = nnfw::make_unique<::arm_compute::NEActivationLayer>();
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
 
       fn->configure(ifm_alloc, ofm_alloc, act_info);
 
@@ -2981,14 +3310,20 @@ void Planner::visit(const ::internal::tflite::op::Logistic::Node &node)
 
     if (::internal::arm_compute::isGpuMode())
     {
-      auto fn = nnfw::make_unique<::arm_compute::CLActivationLayer>();
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
 
       fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), act_info);
 
       builder.append("Logistic", std::move(fn));
     }
     else
-      throw std::runtime_error("Not supported, yet");
+    {
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
+
+      fn->configure(ifm_alloc, ofm_alloc, act_info);
+
+      builder.append("Logistic", std::move(fn));
+    }
   };
 
   _builder.addStage(stage);
@@ -3005,52 +3340,89 @@ void Planner::visit(const ::internal::tflite::op::Mean::Node &node)
   const ::internal::tflite::operand::Index keep_dims_index{node.param().keep_dims_index};
   const int keep_dims = _ctx.at(keep_dims_index).asScalar<int>();
 
-  // Set shape constraints
-  _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()),
-                                                  _ctx.at(ofm_index).type()));
-  _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()),
-                                                  _ctx.at(ifm_index).type()));
-  _builder.addShapeConstr(axis_index, asTensorInfo(asTensorShape(_ctx.at(axis_index).shape()),
-                                                   _ctx.at(axis_index).type()));
+  const auto ifm_shape = _ctx.at(ifm_index).shape();
+  const auto ofm_shape = _ctx.at(ofm_index).shape();
 
-  // TODO keep_dims==0
-  assert(keep_dims != 0);
+  // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only
+  // supports cases reducing height and width or reducing depth.
+  // TODO We have to support all cases of dimensions up to 4.
+  // For correct permuting, we have to set output's shape to be equal in dimension position of the
+  // input. But the positions of the same dimensions in the input and output may be set differently.
+  // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original
+  // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to
+  // extend it in 4 dimensions, it should be {1,1,3,5}.
+  // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of
+  // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the
+  // next operation is not desired.
+  if (ifm_shape.rank() == 4 && ifm_shape.rank() != ofm_shape.rank())
+  {
+    if (ofm_shape.rank() == 2)
+    {
+      // Reducing HW
+      assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(3) == ofm_shape.dim(1));
+    }
+    else if (ofm_shape.rank() == 3)
+    {
+      // Reducing C or
+      // (Reducing H and C(ifm and ofm) == 1) or (Reducing W and C(ifm and ofm) == 1)
+      assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(1) == ofm_shape.dim(1) &&
+                 ifm_shape.dim(2) == ofm_shape.dim(2) ||
+             (ifm_shape.dim(0) == ofm_shape.dim(0) &&
+              (ifm_shape.dim(1) == ofm_shape.dim(1) || ifm_shape.dim(2) == ofm_shape.dim(1)) &&
+              ifm_shape.dim(3) == 1 && ofm_shape.dim(2) == 1));
+    }
+  }
 
-  // Set axis
-  // TODO Other axis (Axis for width and height are currently supported.)
-  // TODO Other ranks (Rank 4 is currently supported.)
-  assert(_ctx.at(ifm_index).shape().rank() == 4);
+  // Set shape constraints
+  _builder.addShapeConstr(
+      ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+                              _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+  _builder.addShapeConstr(
+      ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+                              _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+  _builder.addShapeConstr(axis_index,
+                          asTensorInfo(asTensorShape(_ctx.at(axis_index).shape()),
+                                       _ctx.at(axis_index).type(), _ctx.at(axis_index).scale(),
+                                       _ctx.at(axis_index).zeroPoint()));
 
-  std::vector<uint32_t> axis;
+  std::set<uint32_t> axis;
   {
-    const auto axis_base = _ctx.at(axis_index).data().base();
-    const auto axis_type = _ctx.at(axis_index).type();
-    const auto axis_size = _ctx.at(axis_index).shape().asVector();
-
-    // NHWC type -> WHCN type
-    if (_ctx.at(ofm_index).shape().rank() == 4)
+    const auto ifm_rank = ifm_shape.rank();
+    const auto axis_shape = _ctx.at(axis_index).shape();
+    switch (axis_shape.rank())
     {
-      for (uint32_t n = 0; n < axis_size; ++n)
+      case 0: // scalar
       {
-        const ::arm_compute::Coordinates coordinate{n};
-        const int32_t *from = reinterpret_cast<const int32_t *>(axis_base) + n;
-        if (*from == 1)
+        int32_t axis_value = _ctx.at(axis_index).asScalar<int32_t>();
+        if (axis_value < 0)
         {
-          axis.push_back(1); // h
+          axis_value += ifm_rank;
         }
-        else if (*from == 2)
-        {
-          axis.push_back(0); // w
-        }
-        else if (*from < 0)
-        {
-          // Nothing to do
-        }
-        else
+        axis.insert(ToARMComputeAxis(ifm_rank, axis_value).value());
+        break;
+      }
+      case 1: // vector
+      {
+        const auto axis_base = _ctx.at(axis_index).data().base();
+        const auto axis_size = _ctx.at(axis_index).shape().asVector();
+
+        // If axis's data does not exist as constant values and can be gotten as input data, we have
+        // to find a way to infer output shape when sinking output.
+        assert(axis_base != nullptr);
+        for (uint32_t n = 0; n < axis_size; ++n)
         {
-          throw std::runtime_error{"Not supported axis"};
+          int32_t axis_value = *(reinterpret_cast<const int32_t *>(axis_base) + n);
+          if (axis_value < 0)
+          {
+            axis_value += ifm_rank;
+          }
+          axis.insert(ToARMComputeAxis(ifm_rank, axis_value).value());
         }
+        break;
       }
+      default:
+        throw std::runtime_error("Not supported");
+        break;
     }
   }
 
@@ -3058,7 +3430,7 @@ void Planner::visit(const ::internal::tflite::op::Mean::Node &node)
   {
     int ofm_index;
     int ifm_index;
-    std::vector<uint32_t> axis;
+    std::set<uint32_t> axis;
   };
 
   Param param;
@@ -3073,9 +3445,10 @@ void Planner::visit(const ::internal::tflite::op::Mean::Node &node)
 
     if (::internal::arm_compute::isGpuMode())
     {
-      auto fn = nnfw::make_unique<::arm_compute::CLReductionMean>();
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReduceOperation>();
 
-      fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis);
+      fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis,
+                    ::arm_compute::ReduceOperation::MEAN);
 
       builder.append("Mean", std::move(fn));
     }
@@ -3125,23 +3498,37 @@ void Planner::visit(const ::internal::tflite::op::RNN::Node &node)
          num_units == _ctx.at(hidden_state_out_index).shape().dim(1));
 
   // Set Shape Constraints and TensorInfo
-  _builder.addShapeConstr(output_index, asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
-                                                     _ctx.at(output_index).type()));
+  _builder.addShapeConstr(output_index,
+                          asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+                                       _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+                                       _ctx.at(output_index).zeroPoint()));
   _builder.addShapeConstr(hidden_state_out_index,
                           asTensorInfo(asTensorShape(_ctx.at(hidden_state_out_index).shape()),
-                                       _ctx.at(hidden_state_out_index).type()));
-  _builder.addShapeConstr(input_index, asTensorInfo(asTensorShape(_ctx.at(input_index).shape()),
-                                                    _ctx.at(input_index).type()));
+                                       _ctx.at(hidden_state_out_index).type(),
+                                       _ctx.at(hidden_state_out_index).scale(),
+                                       _ctx.at(hidden_state_out_index).zeroPoint()));
+  _builder.addShapeConstr(input_index,
+                          asTensorInfo(asTensorShape(_ctx.at(input_index).shape()),
+                                       _ctx.at(input_index).type(), _ctx.at(input_index).scale(),
+                                       _ctx.at(input_index).zeroPoint()));
   _builder.addShapeConstr(weights_index, asTensorInfo(asTensorShape(_ctx.at(weights_index).shape()),
-                                                      _ctx.at(weights_index).type()));
+                                                      _ctx.at(weights_index).type(),
+                                                      _ctx.at(weights_index).scale(),
+                                                      _ctx.at(weights_index).zeroPoint()));
   _builder.addShapeConstr(recurrent_weights_index,
                           asTensorInfo(asTensorShape(_ctx.at(recurrent_weights_index).shape()),
-                                       _ctx.at(recurrent_weights_index).type()));
-  _builder.addShapeConstr(bias_index, asTensorInfo(asTensorShape(_ctx.at(bias_index).shape()),
-                                                   _ctx.at(bias_index).type()));
+                                       _ctx.at(recurrent_weights_index).type(),
+                                       _ctx.at(recurrent_weights_index).scale(),
+                                       _ctx.at(recurrent_weights_index).zeroPoint()));
+  _builder.addShapeConstr(bias_index,
+                          asTensorInfo(asTensorShape(_ctx.at(bias_index).shape()),
+                                       _ctx.at(bias_index).type(), _ctx.at(bias_index).scale(),
+                                       _ctx.at(bias_index).zeroPoint()));
   _builder.addShapeConstr(hidden_state_in_index,
                           asTensorInfo(asTensorShape(_ctx.at(hidden_state_in_index).shape()),
-                                       _ctx.at(hidden_state_in_index).type()));
+                                       _ctx.at(hidden_state_in_index).type(),
+                                       _ctx.at(hidden_state_in_index).scale(),
+                                       _ctx.at(hidden_state_in_index).zeroPoint()));
 
   // Construct operation parameters
   struct Param
@@ -3215,13 +3602,13 @@ void Planner::visit(const ::internal::tflite::op::LSTM::Node &node)
 void Planner::visit(const ::internal::tflite::op::Transpose::Node &node)
 {
   VERBOSE(Transpose) << "Configure Transpose operation" << std::endl;
-  // Transpose supports only height-wight dimention support.
-  // CLPermute can be used to implement generic transpose along any axis
-  // But CLPermute only implements [2,0,1], [1,2,0], [3,2,0,1]
 
-  // TODO Implement other permutation CLPermute function and provide generic transpose
   const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
   const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+  const ::internal::tflite::operand::Index permu_index{node.param().permu_index};
+
+  assert(_ctx.at(ifm_index).shape().rank() == _ctx.at(ofm_index).shape().rank());
+  assert(_ctx.at(permu_index).hasData() == true);
 
   // Set shape constraints
   _builder.addShapeConstr(
@@ -3230,30 +3617,41 @@ void Planner::visit(const ::internal::tflite::op::Transpose::Node &node)
   _builder.addShapeConstr(
       ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
                               _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
-  // NNAPI spec provides permutation vector for generic transpose
-  // TODO Make the permutation vector a part of Param
+
   struct Param
   {
     int ofm_index;
     int ifm_index;
+    const int32_t *pv;
+    int rank;
   };
 
   Param param;
 
   param.ofm_index = ofm_index.asInt();
   param.ifm_index = ifm_index.asInt();
+  param.pv = reinterpret_cast<const int32_t *>(_ctx.at(permu_index).data().base());
+  param.rank = _ctx.at(ifm_index).shape().rank();
 
   auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+
     auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
     const auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
 
-    // CLTranspose assumes only spatial transpose, will be replaced with CLPermute
-    // TODO Check the validity of permutation vector, then call CLPermute with permu vector
-    auto fn = nnfw::make_unique<::arm_compute::CLTranspose>();
+    if (::internal::arm_compute::isGpuMode())
+    {
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPermuteEx>();
 
-    fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc));
+      fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc),
+                    getARMComputePermutationVector(param.rank, param.pv));
+
+      builder.append("Transpose", std::move(fn));
+    }
+    else
+    {
+      throw std::runtime_error("Not supported, yet");
+    }
 
-    builder.append("Transpose", std::move(fn));
   };
 
   _builder.addStage(stage);
@@ -3267,10 +3665,12 @@ void Planner::visit(const ::internal::tflite::op::Floor::Node &node)
   const ::internal::tflite::operand::Index ifm_index{node.param().input_index};
 
   // Set shape constraints
-  _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()),
-                                                  _ctx.at(ofm_index).type()));
-  _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()),
-                                                  _ctx.at(ifm_index).type()));
+  _builder.addShapeConstr(
+      ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+                              _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+  _builder.addShapeConstr(
+      ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+                              _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
 
   struct Param
   {
@@ -3289,7 +3689,7 @@ void Planner::visit(const ::internal::tflite::op::Floor::Node &node)
 
     if (::internal::arm_compute::isGpuMode())
     {
-      auto fn = nnfw::make_unique<::arm_compute::CLFloor>();
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLFloor>();
 
       fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc));
 
@@ -3297,7 +3697,7 @@ void Planner::visit(const ::internal::tflite::op::Floor::Node &node)
     }
     else
     {
-      auto fn = nnfw::make_unique<::arm_compute::NEFloor>();
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::NEFloor>();
 
       fn->configure(ifm_alloc, ofm_alloc);
 
@@ -3308,11 +3708,367 @@ void Planner::visit(const ::internal::tflite::op::Floor::Node &node)
   _builder.addStage(stage);
 }
 
+void Planner::visit(const ::internal::tflite::op::ArgMax::Node &node)
+{
+  VERBOSE(ArgMax) << "Configure ARGMAX operation" << std::endl;
+
+  const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+  const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+  const ::internal::tflite::operand::Index axis_index{node.param().axis_index};
+
+  auto ifm_shape = _ctx.at(ifm_index).shape();
+  auto ofm_shape = _ctx.at(ofm_index).shape();
+  auto axis_shape = _ctx.at(axis_index).shape();
+
+  assert(_ctx.at(axis_index).hasData());
+  // Axis dimension is always 1.
+  assert(axis_shape.rank() == 1);
+  assert(ifm_shape.rank() == ofm_shape.rank());
+
+  _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape(), false),
+                                                  _ctx.at(ofm_index).type()));
+  _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape(), false),
+                                                  _ctx.at(ifm_index).type()));
+
+  std::vector<uint32_t> l_axis;
+  const auto axis_size = _ctx.at(axis_index).shape().asVector();
+  auto axis_base = _ctx.at(axis_index).data().base();
+  auto axis_type = _ctx.at(axis_index).type();
+  // TODO Should support axis size > 1.
+  assert(axis_size == 1);
+  // axis is tensor with 1 dimension - always a vector.
+  assert(axis_base != nullptr);
+  for (uint32_t n = 0; n < axis_size; ++n)
+  {
+    int32_t axis_value = *(reinterpret_cast<const int32_t *>(axis_base) + n);
+    if (axis_value < 0)
+    {
+      axis_value += ifm_shape.rank();
+    }
+    l_axis.push_back(ToARMComputeAxis(ifm_shape.rank(), axis_value).value());
+  }
+
+  // Construct operation parameters
+  struct Param
+  {
+    int ofm_index;
+    int ifm_index;
+    std::vector<uint32_t> axis;
+  };
+
+  Param param;
+
+  param.ofm_index = ofm_index.asInt();
+  param.ifm_index = ifm_index.asInt();
+  param.axis = l_axis;
+
+  auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+    auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+    auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+    if (from_env<bool>(std::getenv("USE_SIMPLE_ARGMINMAX")))
+    {
+      // USE CPU VERSION OF ARGMAX
+      auto fn = nnfw::cpp14::make_unique<SimpleArgMinMax>();
+
+      fn->configure(ifm_alloc, ofm_alloc, param.axis, ::arm_compute::ArgOperation::MAX);
+
+      builder.append("ArgMax", std::move(fn));
+    }
+    else
+    {
+
+      if (::internal::arm_compute::isGpuMode())
+      {
+        auto fn = nnfw::cpp14::make_unique<::arm_compute::CLArgMinMax>();
+
+        fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis,
+                      ::arm_compute::ArgOperation::MAX);
+
+        builder.append("ArgMax", std::move(fn));
+      }
+      else
+        throw std::runtime_error("Not supported, yet");
+    }
+  };
+
+  _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::SQRT::Node &node)
+{
+  VERBOSE(SQRT) << "Configure SQRT operation" << std::endl;
+
+  const ::internal::tflite::operand::Index output_index{node.param().output_index};
+  const ::internal::tflite::operand::Index input_index{node.param().input_index};
+
+  // Set shape constraints
+  _builder.addShapeConstr(output_index,
+                          asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+                                       _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+                                       _ctx.at(output_index).zeroPoint()));
+  _builder.addShapeConstr(input_index,
+                          asTensorInfo(asTensorShape(_ctx.at(input_index).shape()),
+                                       _ctx.at(input_index).type(), _ctx.at(input_index).scale(),
+                                       _ctx.at(input_index).zeroPoint()));
+
+  struct Param
+  {
+    int output_index;
+    int input_index;
+  };
+
+  Param param;
+
+  param.output_index = output_index.asInt();
+  param.input_index = input_index.asInt();
+
+  auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+    auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+    auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index});
+
+    const ::arm_compute::ActivationLayerInfo act_info{
+        ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
+
+    if (from_env<bool>(std::getenv("USE_SIMPLE_SQRT")))
+    {
+      // USE CPU VERSION OF SQRT
+      auto fn = nnfw::cpp14::make_unique<SimpleSQRT>();
+
+      fn->configure(input_alloc, output_alloc);
+
+      builder.append("SQRT", std::move(fn));
+    }
+    else
+    {
+      if (::internal::arm_compute::isGpuMode())
+      {
+        auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayer>();
+
+        fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc), act_info);
+
+        builder.append("SQRT", std::move(fn));
+      }
+      else
+      {
+        auto fn = nnfw::cpp14::make_unique<::arm_compute::NEActivationLayer>();
+
+        fn->configure(input_alloc, output_alloc, act_info);
+
+        builder.append("SQRT", std::move(fn));
+      }
+    }
+  };
+
+  _builder.addStage(stage);
+}
+
 void Planner::visit(const ::internal::tflite::op::RSQRT::Node &node)
 {
   VERBOSE(RSQRT) << "Configure Rsqrt operation" << std::endl;
 
-  throw std::runtime_error("Not supported, yet");
+  const ::internal::tflite::operand::Index output_index{node.param().output_index};
+  const ::internal::tflite::operand::Index input_index{node.param().input_index};
+
+  // Set shape constraints
+  _builder.addShapeConstr(output_index,
+                          asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+                                       _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+                                       _ctx.at(output_index).zeroPoint()));
+  _builder.addShapeConstr(input_index,
+                          asTensorInfo(asTensorShape(_ctx.at(input_index).shape()),
+                                       _ctx.at(input_index).type(), _ctx.at(input_index).scale(),
+                                       _ctx.at(input_index).zeroPoint()));
+
+  struct Param
+  {
+    int output_index;
+    int input_index;
+  };
+
+  Param param;
+
+  param.output_index = output_index.asInt();
+  param.input_index = input_index.asInt();
+
+  auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+    auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+    auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index});
+
+    const ::arm_compute::ActivationLayerInfoEx act_info{
+        ::arm_compute::ActivationLayerInfoEx::ActivationFunction::RSQRT};
+
+    if (::internal::arm_compute::isGpuMode())
+    {
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLActivationLayerEx>();
+
+      fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc), act_info);
+
+      builder.append("RSQRT", std::move(fn));
+    }
+    else
+      throw std::runtime_error("Not supported, yet");
+  };
+
+  _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::Equal::Node &node)
+{
+  const ::internal::tflite::operand::Index output_index{node.param().output_index};
+  const ::internal::tflite::operand::Index input1_index{node.param().input1_index};
+  const ::internal::tflite::operand::Index input2_index{node.param().input2_index};
+
+  // Set Shape Constraints and TensorInfo
+  _builder.addShapeConstr(output_index,
+                          asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+                                       _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+                                       _ctx.at(output_index).zeroPoint()));
+
+  if (!(_ctx.at(input1_index).shape() == _ctx.at(input2_index).shape()))
+  {
+    const auto broadcast_rank =
+        std::max(_ctx.at(input1_index).shape().rank(), _ctx.at(input2_index).shape().rank());
+    const_cast<::internal::tflite::operand::Shape &>(_ctx.at(input1_index).shape())
+        .extendRank(broadcast_rank);
+    const_cast<::internal::tflite::operand::Shape &>(_ctx.at(input2_index).shape())
+        .extendRank(broadcast_rank);
+  }
+  _builder.addShapeConstr(input1_index,
+                          asTensorInfo(asTensorShape(_ctx.at(input1_index).shape()),
+                                       _ctx.at(input1_index).type(), _ctx.at(input1_index).scale(),
+                                       _ctx.at(input1_index).zeroPoint()));
+  _builder.addShapeConstr(input2_index,
+                          asTensorInfo(asTensorShape(_ctx.at(input2_index).shape()),
+                                       _ctx.at(input2_index).type(), _ctx.at(input2_index).scale(),
+                                       _ctx.at(input2_index).zeroPoint()));
+
+  // Construct operation parameters
+  struct Param
+  {
+    int output_index;
+    int input1_index;
+    int input2_index;
+  };
+
+  Param param;
+
+  param.output_index = output_index.asInt();
+  param.input1_index = input1_index.asInt();
+  param.input2_index = input2_index.asInt();
+  auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+    auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+    auto input1_alloc = ctx.at(::internal::tflite::operand::Index{param.input1_index});
+    auto input2_alloc = ctx.at(::internal::tflite::operand::Index{param.input2_index});
+
+    if (::internal::arm_compute::isGpuMode())
+    {
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLComparisonOp>();
+
+      fn->configure(CAST_CL(input1_alloc), CAST_CL(input2_alloc), CAST_CL(output_alloc),
+                    ::arm_compute::ComparisonOperation::EQUAL);
+
+      builder.append("Equal", std::move(fn));
+    }
+    else
+    {
+      // TODO Add NEON support
+
+      throw std::runtime_error("Not supported, yet");
+    }
+  };
+  _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::TransposeConv::Node &node)
+{
+  VERBOSE(TransposeConv) << "Configure TransposeConv operation" << std::endl;
+
+  const ::internal::tflite::operand::Index op_shape_index{node.param().op_shape_index};
+  const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+  const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+  const ::internal::tflite::operand::Index ker_index{node.param().ker_index};
+
+  const ::internal::tflite::operand::Index padding_index{node.param().padding_index};
+  const ::internal::tflite::operand::Index hstride_index{node.param().hstride_index};
+  const ::internal::tflite::operand::Index vstride_index{node.param().vstride_index};
+
+  // Only 4D tensors are supported
+  assert(_ctx.at(ofm_index).shape().rank() == 4);
+  assert(_ctx.at(ofm_index).shape().rank() == _ctx.at(ifm_index).shape().rank());
+  assert(_ctx.at(ofm_index).shape().rank() == _ctx.at(ker_index).shape().rank());
+
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
+  const auto ker_shape = _ctx.at(ker_index).shape().asFeature();
+
+  assert(_ctx.at(padding_index).hasData() == true);
+
+  const int32_t vstride = _ctx.at(vstride_index).asScalar<int32_t>();
+  const int32_t hstride = _ctx.at(hstride_index).asScalar<int32_t>();
+
+  const PaddingCode padding_type =
+      static_cast<PaddingCode>(_ctx.at(padding_index).asScalar<int32_t>());
+
+  assert(vstride > 0);
+  assert(hstride > 0);
+  assert((ANEURALNETWORKS_PADDING_SAME == padding_type) ||
+         (ANEURALNETWORKS_PADDING_VALID == padding_type));
+  assert(ifm_shape.N == ofm_shape.N);
+  assert(ifm_shape.C == ker_shape.C);
+  assert(ker_shape.N == ofm_shape.C);
+
+  // Set Shape Constraints and TensorInfo
+  _builder.addShapeConstr(
+      ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+                              _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+  _builder.addShapeConstr(
+      ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+                              _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+  _builder.addShapeConstr(
+      ker_index, asTensorInfo(asTensorShape(_ctx.at(ker_index).shape()), _ctx.at(ker_index).type(),
+                              _ctx.at(ker_index).scale(), _ctx.at(ker_index).zeroPoint()));
+
+  // Construct operation parameters
+  struct Param
+  {
+    int ofm_index;
+    int ifm_index;
+    int ker_index;
+    Padding padding;
+    Stride stride;
+  };
+
+  Param param;
+
+  param.ofm_index = ofm_index.asInt();
+  param.ifm_index = ifm_index.asInt();
+  param.ker_index = ker_index.asInt();
+
+  param.stride.horizontal = hstride;
+  param.stride.vertical = vstride;
+
+  param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME)
+                      ? same_padding(ifm_shape, param.stride, ker_shape.W, ker_shape.H)
+                      : valid_padding();
+
+  auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+    auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+    auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+    auto ker_alloc = ctx.at(::internal::tflite::operand::Index{param.ker_index});
+
+    auto fn = nnfw::cpp14::make_unique<SimpleTransposeConv>();
+
+    // Only rank 4 is supported
+    const int rank = 4;
+
+    auto tconv_info = asPadStringInfo(param.padding, param.stride);
+
+    fn->configure(ifm_alloc, ker_alloc, ofm_alloc, tconv_info, getARMComputeAxises(rank));
+
+    builder.append("TransposeConv", std::move(fn));
+  };
+  _builder.addStage(stage);
 }
 
 void Planner::visit(const ::internal::tflite::op::SquaredDifference::Node &node)
@@ -3320,7 +4076,6 @@ void Planner::visit(const ::internal::tflite::op::SquaredDifference::Node &node)
   const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
   const ::internal::tflite::operand::Index lhs_index{node.param().lhs_index};
   const ::internal::tflite::operand::Index rhs_index{node.param().rhs_index};
-  const ::internal::tflite::operand::Index activation_index{node.param().activation_index};
 
   // Set Shape Constraints and TensorInfo
   _builder.addShapeConstr(
@@ -3349,8 +4104,6 @@ void Planner::visit(const ::internal::tflite::op::SquaredDifference::Node &node)
     int ofm_index;
     int lhs_index;
     int rhs_index;
-
-    FuseCode activation;
   };
 
   Param param;
@@ -3359,8 +4112,6 @@ void Planner::visit(const ::internal::tflite::op::SquaredDifference::Node &node)
   param.lhs_index = lhs_index.asInt();
   param.rhs_index = rhs_index.asInt();
 
-  param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
-
   auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
     auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
     auto lhs_alloc = ctx.at(::internal::tflite::operand::Index{param.lhs_index});
@@ -3368,26 +4119,17 @@ void Planner::visit(const ::internal::tflite::op::SquaredDifference::Node &node)
 
     if (::internal::arm_compute::isGpuMode())
     {
-      auto fn = nnfw::make_unique<SquaredDifferenceOperation>();
-
-      // TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification
-      fn->configure(lhs_alloc, rhs_alloc, ofm_alloc, ::arm_compute::ConvertPolicy::SATURATE, 1.0,
-                    ::arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLSquaredDifference>();
 
+      fn->configure(CAST_CL(lhs_alloc), CAST_CL(rhs_alloc), CAST_CL(ofm_alloc));
       builder.append("SquaredDifference", std::move(fn));
     }
-    else // NEON
+    else
     {
-      auto fn = nnfw::make_unique<SquaredDifferenceOperation>();
-
-      // TODO Decide ConvertPolicy (WARP? SATURATE?) according to NN API specification
-      fn->configure(lhs_alloc, rhs_alloc, ofm_alloc, ::arm_compute::ConvertPolicy::SATURATE, 1.0,
-                    ::arm_compute::RoundingPolicy::TO_ZERO);
-
-      builder.append("SquaredDifference", std::move(fn));
+      // TODO Enable NEON Support
+      throw std::runtime_error("Not supported, yet");
     }
 
-    ActivationBuilder{builder}.append(param.activation, ofm_alloc);
   };
 
   _builder.addStage(stage);
@@ -3446,55 +4188,87 @@ void Planner::visit(const ::internal::tflite::op::Pad::Node &node)
   const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
   const ::internal::tflite::operand::Index paddings_index{node.param().paddings_index};
 
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
-  const auto paddings_shape = _ctx.at(paddings_index).shape().asTensor();
+  assert(_ctx.at(paddings_index).hasData() == true);
 
   // Set Shape Constraints and TensorInfo
+  _builder.addShapeConstr(ifm_index,
+                          asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape(), false),
+                                       _ctx.at(ifm_index).type(), _ctx.at(ifm_index).scale(),
+                                       _ctx.at(ifm_index).zeroPoint()));
+  _builder.addShapeConstr(ofm_index,
+                          asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape(), false),
+                                       _ctx.at(ofm_index).type(), _ctx.at(ofm_index).scale(),
+                                       _ctx.at(ofm_index).zeroPoint()));
   _builder.addShapeConstr(
-      ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
-                              _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
-  _builder.addShapeConstr(
-      ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
-                              _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
-  _builder.addShapeConstr(
-      paddings_index,
-      asTensorInfo(asTensorShape(_ctx.at(paddings_index).shape()), _ctx.at(paddings_index).type(),
-                   _ctx.at(paddings_index).scale(), _ctx.at(paddings_index).zeroPoint()));
+      paddings_index, asTensorInfo(asTensorShape(_ctx.at(paddings_index).shape(), false),
+                                   _ctx.at(paddings_index).type(), _ctx.at(paddings_index).scale(),
+                                   _ctx.at(paddings_index).zeroPoint()));
+
+  // initializer for padding
+  {
+    auto pad_type = _ctx.at(paddings_index).type();
+
+    if (pad_type == ANEURALNETWORKS_TENSOR_INT32)
+    {
+      auto pad_base = _ctx.at(paddings_index).data().base();
+      auto pad_size = _ctx.at(paddings_index).data().size();
+      auto pad_shape = _ctx.at(paddings_index).shape().asMatrix();
+
+      // Supported padding for height and width only.
+      auto initializer = std::bind(initMatrixTensor<int32_t>, _1, pad_shape, pad_base, pad_size);
+      _builder.addInitializer(paddings_index, initializer);
+    }
+    else
+    {
+      throw std::runtime_error("Only Int32 datatype is supported for Pad values");
+    }
+  }
 
   // Construct operation parameters
   struct Param
   {
     int ofm_index;
     int ifm_index;
-    int32_t padding_size;
+    int padding_index;
   };
 
   Param param;
 
   param.ofm_index = ofm_index.asInt();
   param.ifm_index = ifm_index.asInt();
-
-  assert(_ctx.at(paddings_index).hasData() == true);
-
-  // TODO: Currently we are supporting uniform padding for the tensor, so only a single
-  //      value is being read. (TOP = BOTTOM = LEFT = RIGHT).
-  //      Need to read padding values for all the sides (TOP, BOTTOM, LEFT & RIGHT)
-
-  const auto &padding_data = _ctx.at(paddings_index).data();
-  auto base = padding_data.base();
-  auto padsize = reinterpret_cast<const int *>(base) + 3;
-  param.padding_size = *padsize;
+  param.padding_index = paddings_index.asInt();
 
   auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
     auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
     auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+    auto pad_alloc = ctx.at(::internal::tflite::operand::Index{param.padding_index});
 
-    auto fn = nnfw::make_unique<PadLayer>();
+    if (from_env<bool>(std::getenv("USE_SIMPLE_PAD")))
+    {
+      // USE CPU VERSION OF PADLAYER
+      auto rank = 4;
+      auto fn = nnfw::cpp14::make_unique<SimplePadLayer>();
 
-    fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.padding_size);
-    builder.append("Pad", std::move(fn));
+      fn->configure(ifm_alloc, ofm_alloc, pad_alloc, getARMComputeAxises(rank));
 
+      builder.append("PAD", std::move(fn));
+    }
+    else
+    {
+      if (::internal::arm_compute::isGpuMode()) // GPU
+      {
+        auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPadLayerEx>();
+
+        fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), CAST_CL(pad_alloc));
+
+        builder.append("PAD", std::move(fn));
+      }
+      else // NEON
+      {
+        // TODO Enable NEON Support
+        throw std::runtime_error("Not supported, yet");
+      }
+    }
   };
 
   _builder.addStage(stage);
@@ -3506,6 +4280,21 @@ void Planner::visit(const ::internal::tflite::op::SpaceToDepth::Node &node)
   const ::internal::tflite::operand::Index input_index{node.param().input_index};
   const ::internal::tflite::operand::Index block_size_index{node.param().block_size_index};
 
+  const auto input_batch = _ctx.at(input_index).shape().dim(0);
+  const auto output_batch = _ctx.at(output_index).shape().dim(0);
+  const auto input_depth = _ctx.at(input_index).shape().dim(3);
+  const auto output_depth = _ctx.at(output_index).shape().dim(3);
+  const auto block_size = _ctx.at(block_size_index).asScalar<int32_t>();
+  const auto input_height = _ctx.at(input_index).shape().dim(1);
+  const auto input_width = _ctx.at(input_index).shape().dim(2);
+
+  // All assertions as per NNAPI specification.
+  assert(_ctx.at(input_index).shape().rank() == 4);
+  assert(_ctx.at(output_index).shape().rank() == 4);
+  assert((block_size >= 1) && (input_height % block_size == 0) && (input_width % block_size == 0));
+  assert(input_batch == output_batch);
+  assert(input_depth * block_size * block_size == output_depth);
+
   // Set Shape Constraints and TensorInfo
   _builder.addShapeConstr(output_index,
                           asTensorInfo(asTensorShape(_ctx.at(output_index).shape(), false),
@@ -3528,17 +4317,284 @@ void Planner::visit(const ::internal::tflite::op::SpaceToDepth::Node &node)
 
   param.output_index = output_index.asInt();
   param.input_index = input_index.asInt();
-  param.block_size = _ctx.at(block_size_index).asScalar<int32_t>();
+  param.block_size = block_size;
 
   auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
     auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
     auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index});
-    auto rank = 4;
 
-    auto fn = nnfw::make_unique<SimpleSpaceToDepth>();
+    if (from_env<bool>(std::getenv("USE_SIMPLE_SPACETODEPTH")))
+    {
+      // USE CPU VERSION OF SPACETODEPTH
+      auto rank = 4;
+      auto fn = nnfw::cpp14::make_unique<SimpleSpaceToDepth>();
+
+      fn->configure(input_alloc, output_alloc, param.block_size, getARMComputeAxises(rank));
 
-    fn->configure(input_alloc, output_alloc, param.block_size, getARMComputeAxises(rank));
-    builder.append("SpaceToDepth", std::move(fn));
+      builder.append("SpaceToDepth", std::move(fn));
+    }
+    else
+    {
+      if (::internal::arm_compute::isGpuMode()) // GPU
+      {
+        auto fn = nnfw::cpp14::make_unique<::arm_compute::CLSpaceToDepth>();
+
+        fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc), param.block_size);
+
+        builder.append("SpaceToDepth", std::move(fn));
+      }
+      else // NEON
+      {
+        // TODO Enable NEON Support
+        throw std::runtime_error("Not supported, yet");
+      }
+    }
+  };
+
+  _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::SpaceToBatchND::Node &node)
+{
+  const ::internal::tflite::operand::Index output_index{node.param().output_index};
+  const ::internal::tflite::operand::Index input_index{node.param().input_index};
+  const ::internal::tflite::operand::Index block_size_index{node.param().block_size_index};
+  const ::internal::tflite::operand::Index padding_size_index{node.param().padding_size_index};
+
+  { // New block for assertions
+
+    // Currently, only 4D NHWC input/output op_context are supported.
+    // The 4D array need to have exactly 2 spatial dimensions.
+    // TODO: Support arbitrary dimension in SpaceToBatchND.
+    assert(_ctx.at(input_index).shape().rank() == 4);
+    assert(_ctx.at(output_index).shape().rank() == 4);
+    assert(_ctx.at(block_size_index).shape().rank() == 1);
+    assert(_ctx.at(padding_size_index).shape().rank() == 2);
+
+    const auto &output_shape = _ctx.at(output_index).shape();
+    const auto &input_shape = _ctx.at(input_index).shape();
+    const auto &block_size_shape = _ctx.at(block_size_index).shape();
+    const auto &padding_size_shape = _ctx.at(padding_size_index).shape();
+
+    assert(output_shape.dim(3) == input_shape.dim(3));
+    assert(block_size_shape.dim(0) == 2);
+    assert(padding_size_shape.dim(0) == 2);
+    assert(padding_size_shape.dim(1) == 2);
+  }
+
+  // Set Shape Constraints and TensorInfo
+  _builder.addShapeConstr(output_index,
+                          asTensorInfo(asTensorShape(_ctx.at(output_index).shape(), false),
+                                       _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+                                       _ctx.at(output_index).zeroPoint()));
+  _builder.addShapeConstr(input_index,
+                          asTensorInfo(asTensorShape(_ctx.at(input_index).shape(), false),
+                                       _ctx.at(input_index).type(), _ctx.at(input_index).scale(),
+                                       _ctx.at(input_index).zeroPoint()));
+
+  _builder.addShapeConstr(block_size_index,
+                          asTensorInfo(asTensorShape(_ctx.at(block_size_index).shape()),
+                                       _ctx.at(block_size_index).type(),
+                                       _ctx.at(block_size_index).scale(),
+                                       _ctx.at(block_size_index).zeroPoint()));
+
+  _builder.addShapeConstr(padding_size_index,
+                          asTensorInfo(asTensorShape(_ctx.at(padding_size_index).shape()),
+                                       _ctx.at(padding_size_index).type(),
+                                       _ctx.at(padding_size_index).scale(),
+                                       _ctx.at(padding_size_index).zeroPoint()));
+
+  if (_ctx.at(block_size_index).hasData())
+  {
+    const auto rank = _ctx.at(input_index).shape().rank();
+    const auto num_of_block_size = _ctx.at(block_size_index).shape().asVector();
+    auto block_size_base = _ctx.at(block_size_index).data().base();
+    auto block_size_type = _ctx.at(block_size_index).type();
+
+    switch (block_size_type)
+    {
+      case ANEURALNETWORKS_TENSOR_INT32:
+      {
+        auto initializer = [block_size_base, num_of_block_size,
+                            rank](::arm_compute::ITensor &tensor) {
+          assert(num_of_block_size < 4);
+          for (size_t n = 0; n < num_of_block_size; ++n)
+          {
+            const int32_t *from = reinterpret_cast<const int32_t *>(block_size_base) + n;
+            int32_t *into = reinterpret_cast<int32_t *>(
+                tensor.ptr_to_element({ToARMComputeAxis(rank, n + 1).value()}));
+            *into = *from;
+          }
+        };
+        _builder.addInitializer(block_size_index, initializer);
+
+        break;
+      }
+      default:
+      {
+        throw std::runtime_error("Not supported");
+      }
+    }
+  }
+
+  if (_ctx.at(padding_size_index).hasData())
+  {
+    const auto padding_size_shape = _ctx.at(padding_size_index).shape();
+    const auto rank = _ctx.at(input_index).shape().rank();
+    auto padding_size_base = _ctx.at(padding_size_index).data().base();
+    auto padding_size_type = _ctx.at(padding_size_index).type();
+
+    switch (padding_size_type)
+    {
+      case ANEURALNETWORKS_TENSOR_INT32:
+      {
+        auto initializer = [padding_size_base, padding_size_shape,
+                            rank](::arm_compute::ITensor &tensor) {
+          assert(padding_size_shape.dim(1) == 2);
+          assert(padding_size_shape.dim(0) < 4);
+          for (size_t n = 0; n < padding_size_shape.dim(0); ++n)
+          {
+            const int32_t *from = reinterpret_cast<const int32_t *>(padding_size_base) +
+                                  (n * padding_size_shape.dim(1));
+            int32_t *into = reinterpret_cast<int32_t *>(
+                tensor.ptr_to_element({0, ToARMComputeAxis(rank, n + 1).value()}));
+            into[0] = from[0];
+            into[1] = from[1];
+          }
+        };
+        _builder.addInitializer(padding_size_index, initializer);
+        break;
+      }
+      default:
+      {
+        throw std::runtime_error("Not supported");
+      }
+    }
+  }
+
+  // Construct operation parameters
+  struct Param
+  {
+    int output_index;
+    int input_index;
+    int block_size_index;
+    int padding_size_index;
+    int32_t rank;
+  };
+
+  Param param;
+
+  param.output_index = output_index.asInt();
+  param.input_index = input_index.asInt();
+  param.block_size_index = block_size_index.asInt();
+  param.padding_size_index = padding_size_index.asInt();
+  param.rank = _ctx.at(input_index).shape().rank();
+
+  auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+    auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+    auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index});
+    auto block_size_alloc = ctx.at(::internal::tflite::operand::Index{param.block_size_index});
+    auto padding_size_alloc = ctx.at(::internal::tflite::operand::Index{param.padding_size_index});
+
+    // NOTE SimpleSpaceToBatchND is quite slow
+    if (from_env<bool>(std::getenv("USE_SIMPLE_SPACE_TO_BATCH_ND")))
+    {
+      auto fn = nnfw::cpp14::make_unique<SimpleSpaceToBatchND>();
+
+      fn->configure(input_alloc, block_size_alloc, padding_size_alloc, output_alloc);
+      builder.append("SpaceToBatchND", std::move(fn));
+    }
+    else if (::internal::arm_compute::isGpuMode())
+    {
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLSpaceToBatchND>();
+
+      fn->configure(CAST_CL(input_alloc), CAST_CL(block_size_alloc), CAST_CL(padding_size_alloc),
+                    CAST_CL(output_alloc));
+      builder.append("SpaceToBatchND", std::move(fn));
+    }
+    else
+    {
+      // TODO Enable NEON Support
+      throw std::runtime_error("Not supported, yet");
+    }
+  };
+
+  _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::BatchToSpaceNd::Node &node)
+{
+  const ::internal::tflite::operand::Index output_index{node.param().output_index};
+  const ::internal::tflite::operand::Index input_index{node.param().input_index};
+  const ::internal::tflite::operand::Index block_size_index{node.param().block_size_index};
+
+  assert(_ctx.at(input_index).shape().rank() == 4);
+  assert(_ctx.at(output_index).shape().rank() == 4);
+  assert(_ctx.at(block_size_index).hasData() == true);
+
+  const int32_t *block_size =
+      reinterpret_cast<const int32_t *>(_ctx.at(block_size_index).data().base());
+
+  const auto &output_shape = _ctx.at(output_index).shape();
+  const auto &input_shape = _ctx.at(input_index).shape();
+
+  assert((_ctx.at(block_size_index).data().size() / sizeof(int32_t)) == 2 && block_size[0] > 0 &&
+         block_size[1] > 0);
+  {
+    assert(output_shape.dim(3) == input_shape.dim(3));
+    assert(output_shape.dim(1) == input_shape.dim(1) * block_size[0]);
+    assert(output_shape.dim(2) == input_shape.dim(2) * block_size[1]);
+    assert(output_shape.dim(0) == input_shape.dim(0) / (block_size[0] * block_size[1]));
+  }
+
+  // Set Shape Constraints and TensorInfo
+  _builder.addShapeConstr(
+      output_index, asTensorInfo(asTensorShape(output_shape, false), _ctx.at(output_index).type(),
+                                 _ctx.at(output_index).scale(), _ctx.at(output_index).zeroPoint()));
+  _builder.addShapeConstr(
+      input_index, asTensorInfo(asTensorShape(input_shape, false), _ctx.at(input_index).type(),
+                                _ctx.at(input_index).scale(), _ctx.at(input_index).zeroPoint()));
+
+  // Construct operation parameters
+  struct Param
+  {
+    int output_index;
+    int input_index;
+    const int32_t *block_size;
+    int32_t rank;
+  };
+
+  Param param;
+
+  param.output_index = output_index.asInt();
+  param.input_index = input_index.asInt();
+  param.block_size = block_size;
+  param.rank = _ctx.at(input_index).shape().rank();
+
+  auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+    auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+    auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index});
+
+    // NOTE SimpleBatchToSpaceND is quite slow, but may be useful for debugging
+    if (from_env<bool>(std::getenv("USE_SIMPLE_BATCH_TO_SPACE_ND")))
+    {
+      auto fn = nnfw::cpp14::make_unique<SimpleBatchToSpaceND>();
+
+      fn->configure(input_alloc, output_alloc, param.block_size, getARMComputeAxises(param.rank));
+      builder.append("BatchToSpaceND", std::move(fn));
+    }
+    else if (::internal::arm_compute::isGpuMode())
+    {
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLBatchToSpaceND>();
+
+      fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc), param.block_size);
+      builder.append("BatchToSpaceND", std::move(fn));
+    }
+    else
+    {
+      // TODO Enable NEON Support
+      throw std::runtime_error("Not supported, yet");
+    }
 
   };
 
@@ -3550,9 +4606,6 @@ void Planner::visit(const ::internal::tflite::op::L2Normalization::Node &node)
   const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
   const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
 
-  const auto ofm_shape = _ctx.at(ofm_index).shape();
-  const auto ifm_shape = _ctx.at(ifm_index).shape();
-
   // Set Shape Constraints and TensorInfo
   _builder.addShapeConstr(
       ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
@@ -3583,10 +4636,10 @@ void Planner::visit(const ::internal::tflite::op::L2Normalization::Node &node)
   param.ofm_index = ofm_index.asInt();
   param.ifm_index = ifm_index.asInt();
 
-  param.radius = 2 * ifm_shape.dim(3) + 1; // normSize = depth * 2 + 1
-  param.alpha = 1.0f;                      // In the implementation to make alpha_ become 1
-  param.beta = 0.5f;                       // pow(reduction, -0.5) = 1 / sqrt(reduction)
-  param.bias = 0.0f;                       // Don't offset the reduction.
+  param.radius = 2 * _ctx.at(ifm_index).shape().dim(3) + 1; // normSize = depth * 2 + 1
+  param.alpha = 1.0f; // In the implementation to make alpha_ become 1
+  param.beta = 0.5f;  // pow(reduction, -0.5) = 1 / sqrt(reduction)
+  param.bias = 0.0f;  // Don't offset the reduction.
 
   auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
     auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
@@ -3598,7 +4651,7 @@ void Planner::visit(const ::internal::tflite::op::L2Normalization::Node &node)
 
     if (::internal::arm_compute::isGpuMode())
     {
-      auto fn = nnfw::make_unique<::arm_compute::CLNormalizationLayer>();
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLNormalizationLayerEx>();
 
       fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), norm_info);
 
@@ -3606,9 +4659,9 @@ void Planner::visit(const ::internal::tflite::op::L2Normalization::Node &node)
     }
     else
     {
-      auto fn = nnfw::make_unique<::arm_compute::NENormalizationLayer>();
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::NENormalizationLayerEx>();
 
-      fn->configure(CAST_NE(ifm_alloc), CAST_NE(ofm_alloc), norm_info);
+      fn->configure(ifm_alloc, ofm_alloc, norm_info);
 
       builder.append("L2Normalize", std::move(fn));
     }
@@ -3647,10 +4700,12 @@ void Planner::visit(const ::internal::tflite::op::L2Pool2D::Implicit::Node &node
   assert((ANEURALNETWORKS_PADDING_SAME == padding_type) ||
          (ANEURALNETWORKS_PADDING_VALID == padding_type));
 
-  _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()),
-                                                  _ctx.at(ofm_index).type()));
-  _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()),
-                                                  _ctx.at(ifm_index).type()));
+  _builder.addShapeConstr(
+      ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+                              _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+  _builder.addShapeConstr(
+      ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+                              _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
 
   struct Param
   {
@@ -3678,7 +4733,7 @@ void Planner::visit(const ::internal::tflite::op::L2Pool2D::Implicit::Node &node
   param.stride.horizontal = hstride;
 
   param.padding = (padding_type == ANEURALNETWORKS_PADDING_SAME)
-                      ? same_padding(ifm_shape, ofm_shape, param.stride, kw, kh)
+                      ? same_padding(ifm_shape, param.stride, kw, kh)
                       : valid_padding();
   param.activation = static_cast<FuseCode>(_ctx.at(activation_index).asScalar<int32_t>());
 
@@ -3731,9 +4786,6 @@ void Planner::visit(const ::internal::tflite::op::L2Pool2D::Explicit::Node &node
 
   const ::internal::tflite::operand::Index activation_index{node.param().activation_index};
 
-  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
-  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
-
   const int32_t kh = _ctx.at(kh_index).asScalar<int32_t>();
   const int32_t kw = _ctx.at(kw_index).asScalar<int32_t>();
 
@@ -3745,10 +4797,12 @@ void Planner::visit(const ::internal::tflite::op::L2Pool2D::Explicit::Node &node
   const int32_t padding_top = _ctx.at(padding_top_index).asScalar<int32_t>();
   const int32_t padding_bottom = _ctx.at(padding_bottom_index).asScalar<int32_t>();
 
-  _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()),
-                                                  _ctx.at(ofm_index).type()));
-  _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()),
-                                                  _ctx.at(ifm_index).type()));
+  _builder.addShapeConstr(
+      ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+                              _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+  _builder.addShapeConstr(
+      ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+                              _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
 
   // Construct operation parameters
   struct Param
@@ -3876,11 +4930,27 @@ void Planner::visit(const ::internal::tflite::op::EmbeddingLookup::Node &node)
     auto lookups_alloc = ctx.at(::internal::tflite::operand::Index{param.lookups_index});
     auto values_alloc = ctx.at(::internal::tflite::operand::Index{param.values_index});
 
-    auto fn = nnfw::make_unique<SimpleEmbeddingLookup>();
+    if (from_env<bool>(std::getenv("USE_SIMPLE_EMBEDDINGLOOKUP")))
+    {
+      auto fn = nnfw::cpp14::make_unique<SimpleEmbeddingLookup>();
 
-    fn->configure(lookups_alloc, values_alloc, output_alloc);
+      fn->configure(lookups_alloc, values_alloc, output_alloc);
 
-    builder.append("EmbeddingLookup", std::move(fn));
+      builder.append("EmbeddingLookup", std::move(fn));
+    }
+    else if (::internal::arm_compute::isGpuMode())
+    {
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLEmbeddingLookup>();
+
+      fn->configure(CAST_CL(values_alloc), CAST_CL(output_alloc), CAST_CL(lookups_alloc));
+
+      builder.append("EmbeddingLookup", std::move(fn));
+    }
+    else
+    {
+      // TODO Enable NEON Support
+      throw std::runtime_error("Not supported, yet");
+    }
   };
 
   _builder.addStage(stage);
@@ -3888,8 +4958,857 @@ void Planner::visit(const ::internal::tflite::op::EmbeddingLookup::Node &node)
 
 void Planner::visit(const ::internal::tflite::op::HashtableLookup::Node &node)
 {
-  // TODO Implement HashtableLookup
-  throw std::runtime_error("Not supported");
+  const ::internal::tflite::operand::Index output_index{node.param().output_index};
+  const ::internal::tflite::operand::Index hits_index{node.param().hits_index};
+  const ::internal::tflite::operand::Index lookups_index{node.param().lookups_index};
+  const ::internal::tflite::operand::Index values_index{node.param().values_index};
+  const ::internal::tflite::operand::Index keys_index{node.param().keys_index};
+
+  const auto &lookups_obj = _ctx.at(lookups_index);
+  const auto &keys_obj = _ctx.at(keys_index);
+  const auto &hits_obj = _ctx.at(hits_index);
+  const auto &values_obj = _ctx.at(values_index);
+  const auto &output_obj = _ctx.at(output_index);
+
+  assert(lookups_obj.type() == ANEURALNETWORKS_TENSOR_INT32);
+  assert(keys_obj.type() == ANEURALNETWORKS_TENSOR_INT32);
+  assert(hits_obj.type() == ANEURALNETWORKS_TENSOR_QUANT8_ASYMM);
+
+  const auto &lookups_shape = lookups_obj.shape();
+  const auto &keys_shape = keys_obj.shape();
+  const auto &hits_shape = hits_obj.shape();
+  const auto &values_shape = values_obj.shape();
+  const auto &output_shape = output_obj.shape();
+
+  assert(values_shape.rank() == output_shape.rank());
+
+  assert(lookups_shape.rank() == 1);
+  assert(keys_shape.rank() == 1);
+  assert(values_shape.dim(0) == keys_shape.dim(0));
+  assert(lookups_shape.dim(0) == output_shape.dim(0));
+
+  _builder.addShapeConstr(output_index,
+                          asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+                                       _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+                                       _ctx.at(output_index).zeroPoint()));
+  _builder.addShapeConstr(hits_index,
+                          asTensorInfo(asTensorShape(_ctx.at(hits_index).shape()),
+                                       _ctx.at(hits_index).type(), _ctx.at(hits_index).type(),
+                                       _ctx.at(hits_index).zeroPoint()));
+
+  _builder.addShapeConstr(lookups_index, asTensorInfo(asTensorShape(_ctx.at(lookups_index).shape()),
+                                                      _ctx.at(lookups_index).type(),
+                                                      _ctx.at(lookups_index).scale(),
+                                                      _ctx.at(lookups_index).zeroPoint()));
+  _builder.addShapeConstr(values_index,
+                          asTensorInfo(asTensorShape(_ctx.at(values_index).shape()),
+                                       _ctx.at(values_index).type(), _ctx.at(values_index).scale(),
+                                       _ctx.at(values_index).zeroPoint()));
+  _builder.addShapeConstr(keys_index,
+                          asTensorInfo(asTensorShape(_ctx.at(keys_index).shape()),
+                                       _ctx.at(keys_index).type(), _ctx.at(keys_index).scale(),
+                                       _ctx.at(keys_index).zeroPoint()));
+
+  // Construct operation parameters
+  struct Param
+  {
+    int32_t output_index;
+    int32_t hits_index;
+    int32_t lookups_index;
+    int32_t values_index;
+    int32_t keys_index;
+  };
+
+  Param param;
+
+  param.output_index = output_index.asInt();
+  param.hits_index = hits_index.asInt();
+  param.lookups_index = lookups_index.asInt();
+  param.values_index = values_index.asInt();
+  param.keys_index = keys_index.asInt();
+
+  auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+    auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+    auto hits_alloc = ctx.at(::internal::tflite::operand::Index{param.hits_index});
+    auto lookups_alloc = ctx.at(::internal::tflite::operand::Index{param.lookups_index});
+    auto values_alloc = ctx.at(::internal::tflite::operand::Index{param.values_index});
+    auto keys_alloc = ctx.at(::internal::tflite::operand::Index{param.keys_index});
+
+    if (from_env<bool>(std::getenv("USE_SIMPLE_HASHTABLELOOKUP")))
+    {
+      auto fn = nnfw::cpp14::make_unique<SimpleHashtableLookupLayer>();
+
+      fn->configure(lookups_alloc, keys_alloc, values_alloc, output_alloc, hits_alloc);
+
+      builder.append("HashtableLookup", std::move(fn));
+    }
+    else if (::internal::arm_compute::isGpuMode()) // GPU
+    {
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLHashtableLookup>();
+
+      fn->configure(CAST_CL(lookups_alloc), CAST_CL(keys_alloc), CAST_CL(values_alloc),
+                    CAST_CL(output_alloc), CAST_CL(hits_alloc));
+
+      builder.append("HashtableLookup", std::move(fn));
+    }
+    else // NEON
+    {
+      // TODO Enable NEON Support
+      throw std::runtime_error("Not supported, yet");
+    }
+  };
+
+  _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::LocalResponseNormalization::Node &node)
+{
+  const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+  const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+  const ::internal::tflite::operand::Index radius_index{node.param().radius_index};
+  const ::internal::tflite::operand::Index bias_index{node.param().bias_index};
+  const ::internal::tflite::operand::Index alpha_index{node.param().alpha_index};
+  const ::internal::tflite::operand::Index beta_index{node.param().beta_index};
+
+  // Set Shape Constraints and TensorInfo
+  _builder.addShapeConstr(
+      ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+                              _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+  _builder.addShapeConstr(
+      ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+                              _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+
+  // Construct operation parameters
+  struct Param
+  {
+    int ofm_index;
+    int ifm_index;
+    int32_t radius;
+    float bias;
+    float alpha;
+    float beta;
+  };
+
+  Param param;
+
+  param.ofm_index = ofm_index.asInt();
+  param.ifm_index = ifm_index.asInt();
+
+  param.radius = _ctx.at(radius_index).asScalar<int32_t>();
+  param.alpha = _ctx.at(alpha_index).asScalar<float>();
+  param.beta = _ctx.at(beta_index).asScalar<float>();
+  param.bias = _ctx.at(bias_index).asScalar<float>();
+
+  auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+    auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+    auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+    const auto norm_info =
+        ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP, param.radius,
+                                              param.alpha, param.beta, param.bias, false);
+    if (::internal::arm_compute::isGpuMode())
+    {
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLNormalizationLayerEx>();
+
+      fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), norm_info);
+
+      builder.append("LocalResponseNormalization", std::move(fn));
+    }
+    else
+    {
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::NENormalizationLayerEx>();
+
+      fn->configure(ifm_alloc, ofm_alloc, norm_info);
+
+      builder.append("LocalResponseNormalization", std::move(fn));
+    }
+  };
+
+  _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::DepthToSpace::Node &node)
+{
+  const ::internal::tflite::operand::Index output_index{node.param().output_index};
+  const ::internal::tflite::operand::Index input_index{node.param().input_index};
+  const ::internal::tflite::operand::Index block_size_index{node.param().block_size_index};
+
+  assert(_ctx.at(input_index).shape().rank() == 4);
+  assert(_ctx.at(output_index).shape().rank() == 4);
+
+  int32_t block_size = _ctx.at(block_size_index).asScalar<int32_t>();
+  assert(block_size > 0);
+
+  { // assertions block
+    const auto output_shape = _ctx.at(output_index).shape();
+    const auto input_shape = _ctx.at(input_index).shape();
+    assert(output_shape.dim(0) == input_shape.dim(0));
+    assert(output_shape.dim(1) == input_shape.dim(1) * block_size);
+    assert(output_shape.dim(2) == input_shape.dim(2) * block_size);
+    assert(input_shape.dim(3) % (block_size * block_size) == 0);
+    assert(output_shape.dim(3) == input_shape.dim(3) / (block_size * block_size));
+  }
+
+  // Set Shape Constraints and TensorInfo
+  _builder.addShapeConstr(output_index,
+                          asTensorInfo(asTensorShape(_ctx.at(output_index).shape(), false),
+                                       _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+                                       _ctx.at(output_index).zeroPoint()));
+  _builder.addShapeConstr(input_index,
+                          asTensorInfo(asTensorShape(_ctx.at(input_index).shape(), false),
+                                       _ctx.at(input_index).type(), _ctx.at(input_index).scale(),
+                                       _ctx.at(input_index).zeroPoint()));
+
+  // Construct operation parameters
+  struct Param
+  {
+    int output_index;
+    int input_index;
+    int32_t block_size;
+  };
+
+  Param param;
+
+  param.output_index = output_index.asInt();
+  param.input_index = input_index.asInt();
+  param.block_size = block_size;
+
+  auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+    auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+    auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.input_index});
+
+    if (from_env<bool>(std::getenv("USE_SIMPLE_DEPTHTOSPACE")))
+    {
+      // USE CPU VERSION OF DEPTHTOSPACE
+      auto rank = 4;
+      auto fn = nnfw::cpp14::make_unique<SimpleDepthToSpace>();
+
+      fn->configure(input_alloc, output_alloc, param.block_size, getARMComputeAxises(rank));
+
+      builder.append("DepthToSpace", std::move(fn));
+    }
+    else
+    {
+      if (::internal::arm_compute::isGpuMode()) // GPU
+      {
+        auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthToSpace>();
+
+        fn->configure(CAST_CL(input_alloc), CAST_CL(output_alloc), param.block_size);
+
+        builder.append("DepthToSpace", std::move(fn));
+      }
+      else // NEON
+      {
+        // TODO Enable NEON Support
+        throw std::runtime_error("Not supported, yet");
+      }
+    }
+  };
+
+  _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::Unpack::Node &node)
+{
+  VERBOSE(Unpack) << "Configure Unpack operation" << std::endl;
+  const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+  uint32_t input_rank = _ctx.at(ifm_index).shape().rank();
+
+  assert(input_rank == 4 || input_rank == 3 || input_rank == 2);
+  _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()),
+                                                  _ctx.at(ifm_index).type()));
+
+  int32_t axis =
+      _ctx.at(::internal::tflite::operand::Index{node.param().axis_index}).asScalar<int32_t>();
+  // int32_t num_split =
+  // _ctx.at(::internal::tflite::operand::Index{node.param().num_split_index}).asScalar<int32_t>();
+
+  for (const auto &index : node.param().ofm_indexes)
+  {
+    const ::internal::tflite::operand::Index ofm_index{index};
+    _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()),
+                                                    _ctx.at(ofm_index).type()));
+  }
+
+  struct Param
+  {
+    std::vector<int32_t> ofm_indexes;
+    int ifm_index;
+    int axis;
+  };
+
+  if (input_rank == 4)
+  {
+    Param param;
+    param.ifm_index = ifm_index.asInt();
+    param.axis = axis;
+    for (const auto &index : node.param().ofm_indexes)
+    {
+      param.ofm_indexes.push_back(index);
+    }
+
+    auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+      auto input_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+      if (::internal::arm_compute::isGpuMode())
+      {
+        auto fn = nnfw::cpp14::make_unique<SimpleUnpackLayer>();
+        std::vector<::arm_compute::ICLTensor *> outputs;
+        for (const auto &index : param.ofm_indexes)
+        {
+          auto output_alloc = ctx.at(::internal::tflite::operand::Index{index});
+          outputs.push_back(CAST_CL(output_alloc));
+        }
+        fn->configure(CAST_CL(input_alloc), outputs, param.axis);
+
+        builder.append("Unpack", std::move(fn));
+      }
+      else
+        throw std::runtime_error("Not supported, yet");
+    };
+
+    _builder.addStage(stage);
+  }
+  else if (input_rank == 3)
+  {
+    // TODO: generate test case for this and generalize 4D method all cases.
+    throw std::runtime_error("UNPACK_3D not implemented");
+  }
+  else if (input_rank == 2)
+  {
+    throw std::runtime_error("UNPACK_2D not implemented");
+  }
+  else
+  {
+    throw std::runtime_error("UNPACK axis is not valid");
+  }
+}
+
+void Planner::visit(const ::internal::tflite::op::Pack::Node &node)
+{
+  VERBOSE(Pack) << "Configure Pack operation" << std::endl;
+
+  const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+  const uint32_t output_rank = _ctx.at(ofm_index).shape().rank();
+  const uint32_t input_rank = output_rank - 1;
+
+  assert(output_rank == 4 || output_rank == 3 || output_rank == 2);
+
+  for (const auto &index : node.param().ifm_indexes)
+  {
+    const ::internal::tflite::operand::Index ifm_index{index};
+    assert(_ctx.at(ifm_index).shape().rank() == input_rank);
+    _builder.addShapeConstr(ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()),
+                                                    _ctx.at(ifm_index).type()));
+  }
+
+  _builder.addShapeConstr(ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()),
+                                                  _ctx.at(ofm_index).type()));
+
+  int32_t axis =
+      _ctx.at(::internal::tflite::operand::Index{node.param().axis_index}).asScalar<int32_t>();
+
+  struct Param
+  {
+    std::vector<int32_t> ifm_indexes;
+    int ofm_index;
+    int axis;
+  };
+
+  if (input_rank == 3)
+  {
+    Param param;
+    param.ofm_index = ofm_index.asInt();
+    param.axis = axis;
+
+    // TODO: Fix this once all permutations are present.
+    if (param.axis != 0)
+    {
+      throw std::runtime_error("This axis not supported, some 4D permutations are missing");
+    }
+
+    for (const auto &index : node.param().ifm_indexes)
+    {
+      param.ifm_indexes.push_back(index);
+    }
+
+    auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+      auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+
+      if (::internal::arm_compute::isGpuMode())
+      {
+        auto fn = nnfw::cpp14::make_unique<SimplePackLayer>();
+        std::vector<::arm_compute::ICLTensor *> inputs;
+        for (const auto &index : param.ifm_indexes)
+        {
+          auto input_alloc = ctx.at(::internal::tflite::operand::Index{index});
+          inputs.push_back(CAST_CL(input_alloc));
+        }
+        fn->configure(inputs, CAST_CL(output_alloc), param.axis);
+
+        builder.append("Pack", std::move(fn));
+      }
+      else
+        throw std::runtime_error("Not supported, yet");
+    };
+
+    _builder.addStage(stage);
+  }
+  else if (input_rank == 2)
+  {
+    // TODO: generate test case for this and generalize 4D method all cases.
+    throw std::runtime_error("PACK_2D not implemented");
+  }
+  else if (input_rank == 1)
+  {
+    throw std::runtime_error("PACK_1D not implemented");
+  }
+  else
+  {
+    throw std::runtime_error("PACK axis is not valid");
+  }
+}
+
+void Planner::visit(const ::internal::tflite::op::Neg::Node &node)
+{
+  const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+  const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+
+  // Set Shape Constraints and TensorInfo
+  _builder.addShapeConstr(
+      ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+                              _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+  _builder.addShapeConstr(
+      ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+                              _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+  // Construct operation parameters
+  struct Param
+  {
+    int ofm_index;
+    int ifm_index;
+  };
+
+  Param param;
+
+  param.ofm_index = ofm_index.asInt();
+  param.ifm_index = ifm_index.asInt();
+
+  auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+    auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+    auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+    // NOTE SimpleNeg is quite slow, but may be useful for debugging
+    if (from_env<bool>(std::getenv("USE_SIMPLE_NEG")))
+    {
+      auto fn = nnfw::cpp14::make_unique<SimpleNeg>();
+
+      fn->configure(ifm_alloc, ofm_alloc);
+      builder.append("Neg", std::move(fn));
+    }
+    else if (::internal::arm_compute::isGpuMode())
+    {
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLNeg>();
+
+      fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc));
+      builder.append("Neg", std::move(fn));
+    }
+    else
+    {
+      // TODO Enable NEON Support
+      throw std::runtime_error("Not supported, yet");
+    }
+
+  };
+  _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::Exp::Node &node)
+{
+  VERBOSE(Exp) << "Configure Exp operation" << std::endl;
+
+  const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+  const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+
+  // Set shape constraints
+  _builder.addShapeConstr(
+      ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+                              _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+  _builder.addShapeConstr(
+      ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+                              _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+  struct Param
+  {
+    int ofm_index;
+    int ifm_index;
+  };
+
+  Param param;
+
+  param.ofm_index = ofm_index.asInt();
+  param.ifm_index = ifm_index.asInt();
+
+  auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+    auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+    auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+    if (::internal::arm_compute::isGpuMode())
+    {
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLExp>();
+
+      fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc));
+
+      builder.append("Exp", std::move(fn));
+    }
+    else
+    {
+      throw std::runtime_error("Not supported");
+    }
+  };
+
+  _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::ReduceSum::Node &node)
+{
+  VERBOSE(ReduceSum) << "Configure ReduceSum operation" << std::endl;
+
+  const ::internal::tflite::operand::Index ofm_index{node.param().ofm_index};
+  const ::internal::tflite::operand::Index ifm_index{node.param().ifm_index};
+  const ::internal::tflite::operand::Index axis_index{node.param().axis_index};
+
+  const auto ifm_shape = _ctx.at(ifm_index).shape();
+  const auto ofm_shape = _ctx.at(ofm_index).shape();
+  const auto axis_shape = _ctx.at(axis_index).shape();
+
+  assert(ifm_shape.rank() <= 4);
+  assert(ofm_shape.rank() <= ifm_shape.rank());
+  assert(_ctx.at(axis_index).hasData());
+  assert(axis_shape.rank() == 0 || axis_shape.rank() == 1);
+
+  // NOTE For the 4-dimensions, if the rank of input and output are different, this runtime only
+  // supports cases reducing height and width or reducing depth.
+  // TODO We have to support all cases of dimensions up to 4.
+  // For correct permuting, we have to set output's shape to be equal in dimension position of the
+  // input. But the positions of the same dimensions in the input and output may be set differently.
+  // For example {2,3,4,5}(input's shape) can be reduced to {3,5}(output's shape). The original
+  // output shape should be {1,3,1,5}, but real output shape may be {3,5}. If you simply try to
+  // extend it in 4 dimensions, it should be {1,1,3,5}.
+  // Even if output shape is changed to {1,3,1,5}, there is another problem. It is that shape of
+  // output tensor used at next operation is changed to {1,3,1,5} after this operation even if the
+  // next operation is not desired.
+  if (ifm_shape.rank() == 4 && ifm_shape.rank() != ofm_shape.rank())
+  {
+    if (ofm_shape.rank() == 2)
+    {
+      // Reducing HW
+      assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(3) == ofm_shape.dim(1));
+    }
+    else if (ofm_shape.rank() == 3)
+    {
+      // Reducing C or
+      // (Reducing H and C(ifm and ofm) == 1) or (Reducing W and C(ifm and ofm) == 1)
+      assert(ifm_shape.dim(0) == ofm_shape.dim(0) && ifm_shape.dim(1) == ofm_shape.dim(1) &&
+                 ifm_shape.dim(2) == ofm_shape.dim(2) ||
+             (ifm_shape.dim(0) == ofm_shape.dim(0) &&
+              (ifm_shape.dim(1) == ofm_shape.dim(1) || ifm_shape.dim(2) == ofm_shape.dim(1)) &&
+              ifm_shape.dim(3) == 1 && ofm_shape.dim(2) == 1));
+    }
+  }
+
+  // Set shape constraints
+  _builder.addShapeConstr(
+      ofm_index, asTensorInfo(asTensorShape(_ctx.at(ofm_index).shape()), _ctx.at(ofm_index).type(),
+                              _ctx.at(ofm_index).scale(), _ctx.at(ofm_index).zeroPoint()));
+  _builder.addShapeConstr(
+      ifm_index, asTensorInfo(asTensorShape(_ctx.at(ifm_index).shape()), _ctx.at(ifm_index).type(),
+                              _ctx.at(ifm_index).scale(), _ctx.at(ifm_index).zeroPoint()));
+
+  uint32_t input_rank = ifm_shape.rank();
+  std::set<uint32_t> axis;
+  int32_t axis_rank = axis_shape.rank();
+
+  if (axis_rank == 0)
+  {
+    int32_t axis_value = _ctx.at(axis_index).asScalar<int32_t>();
+    if (axis_value < 0)
+    {
+      axis_value += input_rank;
+    }
+    axis.insert(ToARMComputeAxis(input_rank, axis_value).value());
+  }
+  else if (axis_rank == 1)
+  {
+    const auto axis_base = _ctx.at(axis_index).data().base();
+    const auto axis_size = _ctx.at(axis_index).shape().asVector();
+
+    // If axis's data does not exist as constant values and can be gotten as input data, we have to
+    // find a way to infer output shape when sinking output.
+    assert(axis_base != nullptr);
+    for (uint32_t n = 0; n < axis_size; ++n)
+    {
+      int32_t axis_value = *(reinterpret_cast<const int32_t *>(axis_base) + n);
+      if (axis_value < 0)
+      {
+        axis_value += input_rank;
+      }
+      axis.insert(ToARMComputeAxis(input_rank, axis_value).value());
+    }
+  }
+  else
+  {
+    throw std::runtime_error("Not supported axis");
+  }
+
+  struct Param
+  {
+    int ofm_index;
+    int ifm_index;
+    std::set<uint32_t> axis;
+  };
+
+  Param param;
+
+  param.ofm_index = ofm_index.asInt();
+  param.ifm_index = ifm_index.asInt();
+  param.axis = axis;
+
+  auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+    auto ofm_alloc = ctx.at(::internal::tflite::operand::Index{param.ofm_index});
+    auto ifm_alloc = ctx.at(::internal::tflite::operand::Index{param.ifm_index});
+
+    if (::internal::arm_compute::isGpuMode())
+    {
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLReduceOperation>();
+
+      fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc), param.axis,
+                    ::arm_compute::ReduceOperation::SUM);
+
+      builder.append("ReduceSum", std::move(fn));
+    }
+    else
+      throw std::runtime_error("Not supported, yet");
+  };
+
+  _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::Abs::Node &node)
+{
+  // TODO Implement Abs op
+  throw std::runtime_error("Not supported yet");
+}
+
+void Planner::visit(const ::internal::tflite::op::NotEqual::Node &node)
+{
+  const ::internal::tflite::operand::Index output_index{node.param().output_index};
+  const ::internal::tflite::operand::Index input1_index{node.param().input1_index};
+  const ::internal::tflite::operand::Index input2_index{node.param().input2_index};
+
+  // Set Shape Constraints and TensorInfo
+  _builder.addShapeConstr(output_index,
+                          asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+                                       _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+                                       _ctx.at(output_index).zeroPoint()));
+
+  if (!(_ctx.at(input1_index).shape() == _ctx.at(input2_index).shape()))
+  {
+    const auto broadcast_rank =
+        std::max(_ctx.at(input1_index).shape().rank(), _ctx.at(input2_index).shape().rank());
+    const_cast<::internal::tflite::operand::Shape &>(_ctx.at(input1_index).shape())
+        .extendRank(broadcast_rank);
+    const_cast<::internal::tflite::operand::Shape &>(_ctx.at(input2_index).shape())
+        .extendRank(broadcast_rank);
+  }
+  _builder.addShapeConstr(input1_index,
+                          asTensorInfo(asTensorShape(_ctx.at(input1_index).shape()),
+                                       _ctx.at(input1_index).type(), _ctx.at(input1_index).scale(),
+                                       _ctx.at(input1_index).zeroPoint()));
+  _builder.addShapeConstr(input2_index,
+                          asTensorInfo(asTensorShape(_ctx.at(input2_index).shape()),
+                                       _ctx.at(input2_index).type(), _ctx.at(input2_index).scale(),
+                                       _ctx.at(input2_index).zeroPoint()));
+
+  // Construct operation parameters
+  struct Param
+  {
+    int output_index;
+    int input1_index;
+    int input2_index;
+  };
+
+  Param param;
+
+  param.output_index = output_index.asInt();
+  param.input1_index = input1_index.asInt();
+  param.input2_index = input2_index.asInt();
+  auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+    auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+    auto input1_alloc = ctx.at(::internal::tflite::operand::Index{param.input1_index});
+    auto input2_alloc = ctx.at(::internal::tflite::operand::Index{param.input2_index});
+
+    if (::internal::arm_compute::isGpuMode())
+    {
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLComparisonOp>();
+
+      fn->configure(CAST_CL(input1_alloc), CAST_CL(input2_alloc), CAST_CL(output_alloc),
+                    ::arm_compute::ComparisonOperation::NOT_EQUAL);
+
+      builder.append("NotEqual", std::move(fn));
+    }
+    else
+    {
+      // TODO Add NEON support
+
+      throw std::runtime_error("Not supported yet");
+    }
+  };
+  _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::LogicalAnd::Node &node)
+{
+  VERBOSE(Logical_AND) << "Configure Logical_AND operation" << std::endl;
+
+  const ::internal::tflite::operand::Index output_index{node.param().output_index};
+  const ::internal::tflite::operand::Index input1_index{node.param().input1_index};
+  const ::internal::tflite::operand::Index input2_index{node.param().input2_index};
+
+  // Set Shape Constraints and TensorInfo
+  _builder.addShapeConstr(output_index,
+                          asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+                                       _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+                                       _ctx.at(output_index).zeroPoint()));
+
+  if (!(_ctx.at(input1_index).shape() == _ctx.at(input2_index).shape()))
+  {
+    const auto broadcast_rank =
+        std::max(_ctx.at(input1_index).shape().rank(), _ctx.at(input2_index).shape().rank());
+    const_cast<::internal::tflite::operand::Shape &>(_ctx.at(input1_index).shape())
+        .extendRank(broadcast_rank);
+    const_cast<::internal::tflite::operand::Shape &>(_ctx.at(input2_index).shape())
+        .extendRank(broadcast_rank);
+  }
+  _builder.addShapeConstr(input1_index,
+                          asTensorInfo(asTensorShape(_ctx.at(input1_index).shape()),
+                                       _ctx.at(input1_index).type(), _ctx.at(input1_index).scale(),
+                                       _ctx.at(input1_index).zeroPoint()));
+  _builder.addShapeConstr(input2_index,
+                          asTensorInfo(asTensorShape(_ctx.at(input2_index).shape()),
+                                       _ctx.at(input2_index).type(), _ctx.at(input2_index).scale(),
+                                       _ctx.at(input2_index).zeroPoint()));
+
+  // Construct operation parameters
+  struct Param
+  {
+    int output_index;
+    int input1_index;
+    int input2_index;
+  };
+
+  Param param;
+
+  param.output_index = output_index.asInt();
+  param.input1_index = input1_index.asInt();
+  param.input2_index = input2_index.asInt();
+  auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+    auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+    auto input1_alloc = ctx.at(::internal::tflite::operand::Index{param.input1_index});
+    auto input2_alloc = ctx.at(::internal::tflite::operand::Index{param.input2_index});
+
+    if (::internal::arm_compute::isGpuMode())
+    {
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLBinaryLogicalOp>();
+
+      fn->configure(CAST_CL(input1_alloc), CAST_CL(input2_alloc), CAST_CL(output_alloc),
+                    ::arm_compute::BinaryLogicalOperation::AND);
+
+      builder.append("LogicalAnd", std::move(fn));
+    }
+    else
+    {
+      // TODO Add NEON support
+
+      throw std::runtime_error("Not supported yet");
+    }
+  };
+  _builder.addStage(stage);
+}
+
+void Planner::visit(const ::internal::tflite::op::LogicalNot::Node &node)
+{
+  // TODO Implement LogicalNot op
+  throw std::runtime_error("Not supported yet");
+}
+
+void Planner::visit(const ::internal::tflite::op::LogicalOr::Node &node)
+{
+  VERBOSE(LogicalOr) << "Configure LogicalOr operation" << std::endl;
+
+  const ::internal::tflite::operand::Index output_index{node.param().output_index};
+  const ::internal::tflite::operand::Index input1_index{node.param().input1_index};
+  const ::internal::tflite::operand::Index input2_index{node.param().input2_index};
+
+  // Set Shape Constraints and TensorInfo
+  _builder.addShapeConstr(output_index,
+                          asTensorInfo(asTensorShape(_ctx.at(output_index).shape()),
+                                       _ctx.at(output_index).type(), _ctx.at(output_index).scale(),
+                                       _ctx.at(output_index).zeroPoint()));
+
+  if (!(_ctx.at(input1_index).shape() == _ctx.at(input2_index).shape()))
+  {
+    const auto broadcast_rank =
+        std::max(_ctx.at(input1_index).shape().rank(), _ctx.at(input2_index).shape().rank());
+    const_cast<::internal::tflite::operand::Shape &>(_ctx.at(input1_index).shape())
+        .extendRank(broadcast_rank);
+    const_cast<::internal::tflite::operand::Shape &>(_ctx.at(input2_index).shape())
+        .extendRank(broadcast_rank);
+  }
+
+  _builder.addShapeConstr(input1_index,
+                          asTensorInfo(asTensorShape(_ctx.at(input1_index).shape()),
+                                       _ctx.at(input1_index).type(), _ctx.at(input1_index).scale(),
+                                       _ctx.at(input1_index).zeroPoint()));
+  _builder.addShapeConstr(input2_index,
+                          asTensorInfo(asTensorShape(_ctx.at(input2_index).shape()),
+                                       _ctx.at(input2_index).type(), _ctx.at(input2_index).scale(),
+                                       _ctx.at(input2_index).zeroPoint()));
+
+  // Construct operation parameters
+  struct Param
+  {
+    int output_index;
+    int input1_index;
+    int input2_index;
+  };
+
+  Param param;
+
+  param.output_index = output_index.asInt();
+  param.input1_index = input1_index.asInt();
+  param.input2_index = input2_index.asInt();
+  auto stage = [param](const IAllocationContext &ctx, IExecutionBuilder &builder) {
+    auto output_alloc = ctx.at(::internal::tflite::operand::Index{param.output_index});
+    auto input1_alloc = ctx.at(::internal::tflite::operand::Index{param.input1_index});
+    auto input2_alloc = ctx.at(::internal::tflite::operand::Index{param.input2_index});
+    if (::internal::arm_compute::isGpuMode())
+    {
+      auto fn = nnfw::cpp14::make_unique<::arm_compute::CLBinaryLogicalOp>();
+
+      fn->configure(CAST_CL(input1_alloc), CAST_CL(input2_alloc), CAST_CL(output_alloc),
+                    ::arm_compute::BinaryLogicalOperation::OR);
+
+      builder.append("LogicalOr", std::move(fn));
+    }
+    else
+    {
+      // TODO Add NEON support
+
+      throw std::runtime_error("Not supported yet");
+    }
+  };
+  _builder.addStage(stage);
 }
 
 class AllocationContext final : public IAllocationContext
@@ -3939,32 +5858,69 @@ private:
   ::internal::arm_compute::Plan &_plan;
 };
 
+/**
+ * @brief Class to provide methods of compilation plan builder
+ */
 class PlanBuilder final : public IPlanBuilder
 {
 public:
+  /**
+   * @brief Construct a new PlanBuilder object with Plan
+   * @param [in] plan  The Plan object
+   */
   PlanBuilder(::internal::arm_compute::Plan &plan) : _plan{plan}
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief  Add TensorInfo with Shape Constraints
+   * @param [in] ind   Index of operand
+   * @param [in] info  TensorInfo value to set to index of operand
+   * @return  N/A
+   */
   void addShapeConstr(const ::internal::tflite::operand::Index &ind,
                       const ::arm_compute::TensorInfo &info) override;
 
 public:
+  /**
+   * @brief  Add Subsumption constraints
+   * @param [in] ind  Index of operand
+   * @param [in] base  Index of base operand of Subsumption
+   * @param [in] offset  Offset of Subsumption
+   * @param [in] shape  Shape of Subsumption
+   * @param [in] extend_parent  extend_parent value of Subsumption
+   * @return  N/A
+   */
   void addSubsumptionConstr(const ::internal::tflite::operand::Index &ind,
                             const ::internal::tflite::operand::Index &base,
                             const ::arm_compute::Coordinates &offset,
                             const ::arm_compute::TensorShape &shape, bool extend_parent) override;
 
 public:
+  /**
+   * @brief  Add Initializer lambda with ITensor param
+   * @param [in] ind  Index of operand
+   * @param [in] initializer  Initializer to add
+   * @return  N/A
+   */
   void addInitializer(const ::internal::tflite::operand::Index &ind,
                       const Initializer &initializer) override;
 
 public:
+  /**
+   * @brief  Add Stage lambda with IAllocationContext and IExecutionBuilder params
+   * @param [in] stage  Stage to add
+   * @return  N/A
+   */
   void addStage(const Stage &stage) override;
 
 public:
+  /**
+   * @brief  Finilize(build) the Plan
+   * @return  N/A
+   */
   void finalize(void) const;
 
 private:
@@ -4197,6 +6153,8 @@ void PlanBuilder::finalize(void) const
       auto type = operands.at(operand_idx).type();
       auto shape = operands.at(operand_idx).shape();
 
+      // Need to support scalar types (ANEURALNETWORKS_FLOAT32 and ANEURALNETWORKS_INT32)
+      // for rank > 1 tensor, because it can be operand of broadcast operation
       switch (rank)
       {
         case 0: // scalar
@@ -4240,12 +6198,14 @@ void PlanBuilder::finalize(void) const
           auto size = shape.asVector();
           switch (type)
           {
+            case ANEURALNETWORKS_FLOAT32:
             case ANEURALNETWORKS_TENSOR_FLOAT32:
             {
               auto initializer = std::bind(initVectorTensor<float>, _1, base, size);
               _plan.operands().at(operand_idx).access(initializer);
               break;
             }
+            case ANEURALNETWORKS_INT32:
             case ANEURALNETWORKS_TENSOR_INT32:
             {
               auto initializer = std::bind(initVectorTensor<int32_t>, _1, base, size);
@@ -4270,12 +6230,14 @@ void PlanBuilder::finalize(void) const
           auto size = operands.at(operand_idx).data().size();
           switch (type)
           {
+            case ANEURALNETWORKS_FLOAT32:
             case ANEURALNETWORKS_TENSOR_FLOAT32:
             {
               auto initializer = std::bind(initMatrixTensor<float>, _1, matrix_shape, base, size);
               _plan.operands().at(operand_idx).access(initializer);
               break;
             }
+            case ANEURALNETWORKS_INT32:
             case ANEURALNETWORKS_TENSOR_INT32:
             {
               auto initializer = std::bind(initMatrixTensor<int32_t>, _1, matrix_shape, base, size);
@@ -4300,12 +6262,14 @@ void PlanBuilder::finalize(void) const
           auto size = operands.at(operand_idx).data().size();
           switch (type)
           {
+            case ANEURALNETWORKS_FLOAT32:
             case ANEURALNETWORKS_TENSOR_FLOAT32:
             {
               auto initializer = std::bind(initTensor3D<float>, _1, tensor_shape, base, size);
               _plan.operands().at(operand_idx).access(initializer);
               break;
             }
+            case ANEURALNETWORKS_INT32:
             case ANEURALNETWORKS_TENSOR_INT32:
             {
               auto initializer = std::bind(initTensor3D<int32_t>, _1, tensor_shape, base, size);
@@ -4330,12 +6294,14 @@ void PlanBuilder::finalize(void) const
           auto size = operands.at(operand_idx).data().size();
           switch (type)
           {
+            case ANEURALNETWORKS_FLOAT32:
             case ANEURALNETWORKS_TENSOR_FLOAT32:
             {
               auto initializer = std::bind(initFeatureTensor<float>, _1, feature_shape, base, size);
               _plan.operands().at(operand_idx).access(initializer);
               break;
             }
+            case ANEURALNETWORKS_INT32:
             case ANEURALNETWORKS_TENSOR_INT32:
             {
               auto initializer =
@@ -4417,8 +6383,10 @@ int ANeuralNetworksCompilation_finish(ANeuralNetworksCompilation *compilation)
   if (::internal::arm_compute::isGpuMode())
   {
     arm_compute::CLScheduler::get().default_init();
-    arm_compute::CLKernelLibraryEx::get().init("./cl_kernels/", cl::Context::getDefault(),
-                                               cl::Device::getDefault());
+    // NOTE CLKernelLibraryEx must use the same context as CLScheduler
+    // It did not check whether another device is available.
+    arm_compute::CLKernelLibraryEx::get().init(
+        "./cl_kernels/", arm_compute::CLScheduler::get().context(), cl::Device::getDefault());
   }
 
   const auto &operands = compilation->plan().model().operands();
diff --git a/runtimes/pure_arm_compute/src/compilation.h b/runtimes/pure_arm_compute/src/compilation.h
index dd3613b2d..1a06d06b9 100644
--- a/runtimes/pure_arm_compute/src/compilation.h
+++ b/runtimes/pure_arm_compute/src/compilation.h
@@ -14,15 +14,28 @@
  * limitations under the License.
  */
 
+/**
+ * @file compilation.h
+ * @brief This file defines ANeuralNetworksCompilation class for handling Compilation NNAPI
+ * @ingroup COM_AI_RUNTIME
+ */
+
 #ifndef __COMPILATION_H__
 #define __COMPILATION_H__
 
 #include "internal/Model.h"
 #include "internal/arm_compute.h"
 
+/**
+ * @brief struct to define Compilation of NNAPI
+ */
 struct ANeuralNetworksCompilation
 {
 public:
+  /**
+   * @brief Construct with params
+   * @param [in] model Pointer of internal::tflite::Model to set internal::arm_compute::Plan
+   */
   ANeuralNetworksCompilation(const std::shared_ptr<const internal::tflite::Model> &model)
       : _plan{new internal::arm_compute::Plan{model}}
   {
@@ -30,11 +43,28 @@ public:
   }
 
 public:
+  /**
+   * @brief Get reference of internal::arm_compute::Plan
+   * @return Reference of internal::arm_compute::Plan
+   */
   internal::arm_compute::Plan &plan(void) { return *_plan; }
 
 public:
+  /**
+   * @brief Publish internal Plan to param
+   * @param [out] plan Pointer of internal::arm_compute::Plan to be set
+   * @return N/A
+   */
   void publish(std::shared_ptr<const internal::arm_compute::Plan> &plan) { plan = _plan; }
+  /**
+   * @brief Get @c true if ANeuralNetworksCompilation_finish has been called, otherwise @c false
+   * @return @c true if ANeuralNetworksCompilation_finish has been called, otherwise @c false
+   */
   bool isFinished(void) { return _isFinished; }
+  /**
+   * @brief Mark compilation process finished
+   * @return N/A
+   */
   void markAsFinished() { _isFinished = true; }
 
 private:
diff --git a/runtimes/pure_arm_compute/src/event.h b/runtimes/pure_arm_compute/src/event.h
index 5d41dca84..b5595583c 100644
--- a/runtimes/pure_arm_compute/src/event.h
+++ b/runtimes/pure_arm_compute/src/event.h
@@ -14,9 +14,18 @@
  * limitations under the License.
  */
 
+/**
+ * @file event.h
+ * @brief This file defines ANeuralNetworksEvent struct for handling Event NNAPI
+ * @ingroup COM_AI_RUNTIME
+ */
+
 #ifndef __EVENT_H__
 #define __EVENT_H__
 
+/**
+ * @brief struct to define Event of NNAPI
+ */
 struct ANeuralNetworksEvent
 {
 };
diff --git a/runtimes/pure_arm_compute/src/execution.cc b/runtimes/pure_arm_compute/src/execution.cc
index 778a22155..b7eba1cef 100644
--- a/runtimes/pure_arm_compute/src/execution.cc
+++ b/runtimes/pure_arm_compute/src/execution.cc
@@ -18,8 +18,8 @@
 
 #include "compilation.h"
 #include "execution.h"
-#include "util/profiling/profiling.h"
-#include "util/profiling/profiler.h"
+#include "profiling/profiling.h"
+#include "profiling/profiler.h"
 #include "event.h"
 
 #include "internal/VectorSource.h"
@@ -34,7 +34,7 @@
 #include "internal/Tensor3DSink.h"
 #include "internal/FeatureSink.h"
 
-#include "util/feature/IndexIterator.h"
+#include "misc/feature/IndexIterator.h"
 
 #include <arm_compute/runtime/CL/CLScheduler.h>
 
@@ -70,7 +70,7 @@ static void asVectorSource(ANeuralNetworksExecution *execution, int32_t type, in
 }
 
 static void asMatrixSource(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
-                           const nnfw::util::matrix::Shape &shape, const void *buffer,
+                           const nnfw::misc::matrix::Shape &shape, const void *buffer,
                            size_t length)
 {
   switch (type)
@@ -100,7 +100,7 @@ static void asMatrixSource(ANeuralNetworksExecution *execution, int32_t type, in
 }
 
 static void asTensor3DSource(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
-                             const nnfw::util::tensor::Shape &shape, const void *buffer,
+                             const nnfw::misc::tensor::Shape &shape, const void *buffer,
                              size_t length)
 {
   switch (type)
@@ -130,7 +130,7 @@ static void asTensor3DSource(ANeuralNetworksExecution *execution, int32_t type,
 }
 
 static void asTensorSource(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
-                           const nnfw::util::tensor::Shape &shape, const void *buffer,
+                           const nnfw::misc::tensor::Shape &shape, const void *buffer,
                            size_t length)
 {
   switch (type)
@@ -160,7 +160,7 @@ static void asTensorSource(ANeuralNetworksExecution *execution, int32_t type, in
 }
 
 static void asFeatureSource(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
-                            const nnfw::util::feature::Shape &shape, const void *buffer,
+                            const nnfw::misc::feature::Shape &shape, const void *buffer,
                             size_t length)
 {
   switch (type)
@@ -244,7 +244,7 @@ static void asMatrixSink(ANeuralNetworksExecution *execution, int32_t type, int3
 }
 
 static void asFeatureSink(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
-                          const nnfw::util::feature::Shape &shape, void *buffer, size_t length)
+                          const nnfw::misc::feature::Shape &shape, void *buffer, size_t length)
 {
   switch (type)
   {
@@ -272,7 +272,7 @@ static void asFeatureSink(ANeuralNetworksExecution *execution, int32_t type, int
 }
 
 static void asTensor3DSink(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
-                           const nnfw::util::tensor::Shape &shape, void *buffer, size_t length)
+                           const nnfw::misc::tensor::Shape &shape, void *buffer, size_t length)
 {
   assert(shape.rank() == 3);
 
@@ -302,7 +302,7 @@ static void asTensor3DSink(ANeuralNetworksExecution *execution, int32_t type, in
 }
 
 static void asTensorSink(ANeuralNetworksExecution *execution, int32_t type, int32_t index,
-                         const nnfw::util::tensor::Shape &shape, void *buffer, size_t length)
+                         const nnfw::misc::tensor::Shape &shape, void *buffer, size_t length)
 {
   switch (type)
   {
@@ -420,9 +420,9 @@ int ANeuralNetworksExecution_setInput(ANeuralNetworksExecution *execution, int32
 
 // squeeze(shape) eliminates all the dimensions whose dimensionality is 1
 // For example, squeeze([3, 1, 3]) returns [3, 3]
-static nnfw::util::tensor::Shape squeeze(const nnfw::util::tensor::Shape &shape)
+static nnfw::misc::tensor::Shape squeeze(const nnfw::misc::tensor::Shape &shape)
 {
-  nnfw::util::tensor::Shape res(0);
+  nnfw::misc::tensor::Shape res(0);
 
   for (uint32_t axis = 0; axis < shape.rank(); ++axis)
   {
@@ -497,7 +497,7 @@ int ANeuralNetworksExecution_startCompute(ANeuralNetworksExecution *execution,
     return ANEURALNETWORKS_UNEXPECTED_NULL;
   }
 
-  const bool sync = profiling::Context::get().sync().enabled();
+  const bool sync = profiling::Context::get().sync();
   const auto &plan = execution->plan();
   const auto &model = plan.model();
 
diff --git a/runtimes/pure_arm_compute/src/execution.h b/runtimes/pure_arm_compute/src/execution.h
index c036fe2c8..f55ab3fbf 100644
--- a/runtimes/pure_arm_compute/src/execution.h
+++ b/runtimes/pure_arm_compute/src/execution.h
@@ -14,6 +14,13 @@
  * limitations under the License.
  */
 
+/**
+ * @file execution.h
+ * @brief This file contains ANeuralNetworksExecution class for handling Execution NNAPI such as
+ * ANeuralNetworksExecution_create, ANeuralNetworksExecution_setInput
+ * @ingroup COM_AI_RUNTIME
+ */
+
 #ifndef __EXECUTION_H__
 #define __EXECUTION_H__
 
@@ -21,9 +28,16 @@
 #include "internal/Sink.h"
 #include "internal/Source.h"
 
+/**
+ * @brief struct to express Execution of NNAPI
+ */
 struct ANeuralNetworksExecution
 {
 public:
+  /**
+   * @brief Construct with params
+   * @param [in] plan Pointer to get internal::arm_compute::Plan
+   */
   ANeuralNetworksExecution(const std::shared_ptr<const internal::arm_compute::Plan> &plan)
       : _plan{plan}
   {
@@ -32,31 +46,69 @@ public:
   }
 
 public:
+  /**
+   * @brief Get reference of internal::arm_compute::Plan
+   * @return Const reference of internal::arm_compute::Plan
+   */
   const internal::arm_compute::Plan &plan(void) const { return *_plan; }
 
 private:
   std::shared_ptr<const internal::arm_compute::Plan> _plan;
 
 public:
+  /**
+   * @brief Set the nth source with param
+   * @param [in] n Index of the nth source
+   * @param [in] source Pointer to set the nth source from
+   * @return N/A
+   */
   // TODO Use InputIndex instead of int
   void source(int n, std::unique_ptr<Source> &&source) { _sources.at(n) = std::move(source); }
+  /**
+   * @brief Set the nth source with param
+   * @param [in] n Index of the nth source
+   * @param [in] args Arguments to set the nth source from
+   * @return N/A
+   */
   template <typename T, typename... Args> void source(int n, Args &&... args)
   {
     source(n, std::unique_ptr<T>{new T{std::forward<Args>(args)...}});
   }
 
 public:
+  /**
+   * @brief Get the nth source
+   * @param [in] n Index of the nth source
+   * @return Const reference of Source
+   */
   const Source &source(int n) const { return *(_sources.at(n)); }
 
 public:
+  /**
+   * @brief Set the nth sink with param
+   * @param [in] n Index of the nth sink
+   * @param [in] sink Pointer to set the nth sink from
+   * @return N/A
+   */
   // TODO Use OutputIndex instead of int
   void sink(int n, std::unique_ptr<Sink> &&sink) { _sinks.at(n) = std::move(sink); }
+  /**
+   * @brief Set the nth sink with param
+   * @param [in] n Index of the nth sink
+   * @param [in] args Arguments to set the nth sink from
+   * @return N/A
+   */
   template <typename T, typename... Args> void sink(int n, Args &&... args)
   {
     sink(n, std::unique_ptr<T>{new T{std::forward<Args>(args)...}});
   }
 
 public:
+  /**
+   * @brief Get the nth sink
+   * @param [in] n Index of the nth sink
+   * @return Const reference of Sink
+   */
   const Sink &sink(int n) const { return *(_sinks.at(n)); }
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/FeatureSink.h b/runtimes/pure_arm_compute/src/internal/FeatureSink.h
index 9e4412c2a..7c6884141 100644
--- a/runtimes/pure_arm_compute/src/internal/FeatureSink.h
+++ b/runtimes/pure_arm_compute/src/internal/FeatureSink.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file        FeatureSink.h
+ * @brief       This file contains FeatureSink class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
 #ifndef __INTERNAL_FEATURE_SINK_H__
 #define __INTERNAL_FEATURE_SINK_H__
 
@@ -21,22 +27,36 @@
 #include "internal/nnapi/feature/View.h"
 #include "internal/arm_compute/feature/View.h"
 
-#include <util/feature/Shape.h>
-#include "util/feature/IndexIterator.h"
+#include <misc/feature/Shape.h>
+#include "misc/feature/IndexIterator.h"
 
-//
-// FeatureSink
-//
+/**
+ * @brief Class to store Feature(4D) output data.
+ * This is for pulling data to internal tensor from other tensor.
+ * @tparam T Type of the data elements
+ */
 template <typename T> class FeatureSink final : public Sink
 {
 public:
-  FeatureSink(const nnfw::util::feature::Shape &shape, T *base, const size_t size)
+  /**
+   * @brief Construct a FeatureSink object
+   *
+   * @param[in] shape 4D tensor dimensions for this feature
+   * @param[in] base Base pointer of the actual data
+   * @param[in] size Size of the data
+   */
+  FeatureSink(const nnfw::misc::feature::Shape &shape, T *base, const size_t size)
       : _shape{shape}, _base{base}, _size{size}
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Pull the data into the internal structure
+   * @param[in] tensor The tensor which contains source data
+   * @return N/A
+   */
   void pull(::arm_compute::ITensor &tensor) const override
   {
     const ::internal::arm_compute::feature::View<T> from{&tensor};
@@ -44,7 +64,7 @@ public:
     // Inevitably casting must be done.
     ::internal::nnapi::feature::View<T> into{_shape, _base, _size};
 
-    ::nnfw::util::feature::iterate(_shape)
+    ::nnfw::misc::feature::iterate(_shape)
         << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
              const auto value = from.at(batch, ch, row, col);
              into.at(batch, ch, row, col) = value;
@@ -52,7 +72,7 @@ public:
   }
 
 private:
-  const nnfw::util::feature::Shape _shape;
+  const nnfw::misc::feature::Shape _shape;
   T *const _base;
   const size_t _size;
 };
diff --git a/runtimes/pure_arm_compute/src/internal/FeatureSource.h b/runtimes/pure_arm_compute/src/internal/FeatureSource.h
index fca56e341..772beb701 100644
--- a/runtimes/pure_arm_compute/src/internal/FeatureSource.h
+++ b/runtimes/pure_arm_compute/src/internal/FeatureSource.h
@@ -14,31 +14,54 @@
  * limitations under the License.
  */
 
+/**
+ * @file        FeatureSource.h
+ * @brief       This file contains FeatureSource class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
 #ifndef __INTERNAL_FEATURE_SOURCE_H__
 #define __INTERNAL_FEATURE_SOURCE_H__
 
-#include <util/feature/Shape.h>
-#include <util/feature/IndexIterator.h>
+#include <misc/feature/Shape.h>
+#include <misc/feature/IndexIterator.h>
 
 #include "internal/nnapi/feature/Reader.h"
 #include "internal/arm_compute/feature/View.h"
 
+/**
+ * @brief Class to store feature(4D) input data.
+ * This is for push out the data to another tensor.
+ * @tparam T Type of the data elements
+ */
 template <typename T> class FeatureSource final : public Source
 {
 public:
-  FeatureSource(const nnfw::util::feature::Shape &shape, const T *base, const size_t size)
+  /**
+   * @brief Construct a FeatureSource object
+   *
+   * @param[in] shape 4D tensor dimensions for this feature
+   * @param[in] base Base pointer of the actual data
+   * @param[in] size Size of the data
+   */
+  FeatureSource(const nnfw::misc::feature::Shape &shape, const T *base, const size_t size)
       : _shape{shape}, _base{base}, _size{size}
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Push the data out to the another tensor
+   * @param[out] The tensor that output data will be stored
+   * @return N/A
+   */
   void push(::arm_compute::ITensor &tensor) const override
   {
     const ::internal::nnapi::feature::Reader<T> from{_shape, _base, _size};
     ::internal::arm_compute::feature::View<T> into{&tensor};
 
-    ::nnfw::util::feature::iterate(_shape)
+    ::nnfw::misc::feature::iterate(_shape)
         << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
              const auto value = from.at(batch, ch, row, col);
              into.at(batch, ch, row, col) = value;
@@ -46,7 +69,7 @@ public:
   }
 
 private:
-  const nnfw::util::feature::Shape _shape;
+  const nnfw::misc::feature::Shape _shape;
   const T *const _base;
   const size_t _size;
 };
diff --git a/runtimes/pure_arm_compute/src/internal/IExecutionBuilder.h b/runtimes/pure_arm_compute/src/internal/IExecutionBuilder.h
index aa1e67177..2a6e2a743 100644
--- a/runtimes/pure_arm_compute/src/internal/IExecutionBuilder.h
+++ b/runtimes/pure_arm_compute/src/internal/IExecutionBuilder.h
@@ -14,6 +14,11 @@
  * limitations under the License.
  */
 
+/**
+ * @file    IExecutionBuilder.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines interface of ExecutionBuilder
+ */
 #ifndef __INTERNAL_IEXECUTION_BUILDER_H__
 #define __INTERNAL_IEXECUTION_BUILDER_H__
 
@@ -22,10 +27,22 @@
 #include <memory>
 #include <string>
 
+/**
+ * @brief Struct to define interface of ExecutionBuilder
+ */
 struct IExecutionBuilder
 {
+  /**
+   * @brief Destroy the IExecutionBuilder object
+   */
   virtual ~IExecutionBuilder() = default;
 
+  /**
+   * @brief     Append function to execute
+   * @param[in] name  Name of function
+   * @param[in] f     Function to append
+   * @return    N/A
+   */
   virtual void append(const std::string &name, std::unique_ptr<::arm_compute::IFunction> &&f) = 0;
 };
 
diff --git a/runtimes/pure_arm_compute/src/internal/MatrixSink.h b/runtimes/pure_arm_compute/src/internal/MatrixSink.h
index 32bd49dc6..23ecc112b 100644
--- a/runtimes/pure_arm_compute/src/internal/MatrixSink.h
+++ b/runtimes/pure_arm_compute/src/internal/MatrixSink.h
@@ -14,6 +14,11 @@
  * limitations under the License.
  */
 
+/**
+ * @file    MatrixSink.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines MatrixSink class
+ */
 #ifndef __INTERNAL_MATRIX_SINK_H__
 #define __INTERNAL_MATRIX_SINK_H__
 
@@ -27,9 +32,19 @@
 #include <cstring>
 #include <cassert>
 
+/**
+ * @brief Class to get matrix data from arm compute tensor
+ */
 template <typename T> class MatrixSink final : public Sink
 {
 public:
+  /**
+   * @brief Construct a new Matrix Sink object
+   * @param[in] H     Height of matrix
+   * @param[in] W     Width of matrix
+   * @param[in] base  Pointer to get data
+   * @param[in] size  Size of matrix
+   */
   MatrixSink(const int32_t H, const int32_t W, T *base, const size_t size)
       : _height{H}, _width{W}, _base{base}
   {
@@ -37,6 +52,11 @@ public:
   }
 
 public:
+  /**
+   * @brief     Get matrix data from arm compute tensor to base
+   * @param[in] tensor  Tensor object of arm compute to get data
+   * @return    N/A
+   */
   void pull(::arm_compute::ITensor &tensor) const override
   {
     assert(tensor.info()->dimension(0) == _width);
diff --git a/runtimes/pure_arm_compute/src/internal/MatrixSource.h b/runtimes/pure_arm_compute/src/internal/MatrixSource.h
index 2f5d92484..71d6a804f 100644
--- a/runtimes/pure_arm_compute/src/internal/MatrixSource.h
+++ b/runtimes/pure_arm_compute/src/internal/MatrixSource.h
@@ -14,6 +14,11 @@
  * limitations under the License.
  */
 
+/**
+ * @file    MatrixSource.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines MatrixSource class
+ */
 #ifndef __INTERNAL_MATRIX_SOURCE_H__
 #define __INTERNAL_MATRIX_SOURCE_H__
 
@@ -23,16 +28,30 @@
 
 #include "internal/Source.h"
 
+/**
+ * @brief Class to push matrix data to arm compute tensor
+ */
 template <typename T> class MatrixSource final : public Source
 {
 public:
-  MatrixSource(const nnfw::util::matrix::Shape &shape, const T *base, const size_t size)
+  /**
+   * @brief Construct a new MatrixSource object
+   * @param[in] shape Shape of matrix
+   * @param[in] base  Pointer of matrix data to push
+   * @param[in] size  Size of matrix
+   */
+  MatrixSource(const nnfw::misc::matrix::Shape &shape, const T *base, const size_t size)
       : _shape{shape}, _base{base}, _size{size}
   {
     // do nothing
   }
 
 public:
+  /**
+   * @brief       Push matrix data to arm compute tensor
+   * @param[out]  tensor  Tensor object of arm compute to push matrix data
+   * @return      N/A
+   */
   void push(::arm_compute::ITensor &tensor) const override
   {
     using ::arm_compute::Window;
@@ -55,7 +74,7 @@ public:
   }
 
 private:
-  const nnfw::util::matrix::Shape _shape;
+  const nnfw::misc::matrix::Shape _shape;
   const T *const _base;
   const size_t _size;
 };
diff --git a/runtimes/pure_arm_compute/src/internal/Model.cc b/runtimes/pure_arm_compute/src/internal/Model.cc
index 3a31f9911..03753fea2 100644
--- a/runtimes/pure_arm_compute/src/internal/Model.cc
+++ b/runtimes/pure_arm_compute/src/internal/Model.cc
@@ -16,8 +16,6 @@
 
 #include "internal/Model.h"
 
-#include <cassert>
-
 namespace internal
 {
 namespace tflite
@@ -25,7 +23,7 @@ namespace tflite
 namespace operand
 {
 
-Shape::Shape(uint32_t rank) : nnfw::util::tensor::Shape(rank)
+Shape::Shape(uint32_t rank) : nnfw::misc::tensor::Shape(rank)
 {
   // DO NOTHING
 }
@@ -37,17 +35,17 @@ int32_t Shape::asVector(void) const
   return dim(0);
 }
 
-nnfw::util::matrix::Shape Shape::asMatrix(void) const
+nnfw::misc::matrix::Shape Shape::asMatrix(void) const
 {
   assert(rank() == 2);
 
   const auto height = dim(0);
   const auto width = dim(1);
 
-  return nnfw::util::matrix::Shape(height, width);
+  return nnfw::misc::matrix::Shape(height, width);
 }
 
-nnfw::util::feature::Shape Shape::asFeature(void) const
+nnfw::misc::feature::Shape Shape::asFeature(void) const
 {
   assert(rank() == 4);
 
@@ -62,15 +60,15 @@ nnfw::util::feature::Shape Shape::asFeature(void) const
   const auto height = dim(1);
   const auto width = dim(2);
 
-  return nnfw::util::feature::Shape(batch, depth, height, width);
+  return nnfw::misc::feature::Shape(batch, depth, height, width);
 }
 
-nnfw::util::tensor::Shape Shape::asTensor(void) const
+nnfw::misc::tensor::Shape Shape::asTensor(void) const
 {
-  return nnfw::util::tensor::Shape(*this); // this shape represents shape of NNAPI
+  return nnfw::misc::tensor::Shape(*this); // this shape represents shape of NNAPI
 }
 
-nnfw::util::kernel::Shape Shape::asKernel(void) const
+nnfw::misc::kernel::Shape Shape::asKernel(void) const
 {
   assert(rank() == 4);
 
@@ -84,7 +82,7 @@ nnfw::util::kernel::Shape Shape::asKernel(void) const
   const auto height = dim(1);
   const auto width = dim(2);
 
-  return nnfw::util::kernel::Shape(count, depth, height, width);
+  return nnfw::misc::kernel::Shape(count, depth, height, width);
 }
 
 // Extended dimension is filled with 1.
@@ -120,6 +118,11 @@ const Object &Set::at(const Index &index) const { return *(_objects.at(index.asI
 
 Object &Set::at(const Index &index) { return *(_objects.at(index.asInt())); }
 
+bool Set::exist(const Index &index) const
+{
+  return index.asInt() >= 0 && index.asInt() < _objects.size();
+}
+
 } // namespace operand
 } // namespace tflite
 } // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/Model.h b/runtimes/pure_arm_compute/src/internal/Model.h
index 33ba3a8fd..bdcf32f6f 100644
--- a/runtimes/pure_arm_compute/src/internal/Model.h
+++ b/runtimes/pure_arm_compute/src/internal/Model.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file Model.h
+ * @brief This file contains classes for handle internal Model object
+ * @ingroup COM_AI_RUNTIME
+ */
+
 #ifndef __INTERNAL_MODEL_H__
 #define __INTERNAL_MODEL_H__
 
@@ -24,15 +30,26 @@ namespace tflite
 namespace operand
 {
 
+/**
+ * @brief Class to express index of operand.
+ */
 class Index
 {
 public:
+  /**
+   * @brief Construct a new Index object for operand with param.
+   * @param [in] value The number of index
+   */
   explicit Index(int value) : _value{value}
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Get index value as int
+   * @return Index value as int
+   */
   int asInt(void) const { return _value; }
 
 private:
@@ -46,10 +63,10 @@ private:
 #include <vector>
 #include <cstdint>
 
-#include "util/feature/Shape.h"
-#include "util/matrix/Shape.h"
-#include "util/kernel/Shape.h"
-#include "util/tensor/Shape.h"
+#include "misc/feature/Shape.h"
+#include "misc/matrix/Shape.h"
+#include "misc/kernel/Shape.h"
+#include "misc/tensor/Shape.h"
 
 namespace internal
 {
@@ -58,19 +75,51 @@ namespace tflite
 namespace operand
 {
 
-struct Shape : public nnfw::util::tensor::Shape
+/**
+ * @brief Class to express shape of operand.
+ */
+struct Shape : public nnfw::misc::tensor::Shape
 {
 public:
+  /**
+   * @brief Construct a new Shape object for operand with param.
+   * @param [in] rank The rank value of shape
+   */
   Shape(uint32_t rank);
 
 public:
+  /**
+   * @brief Get dimension value of tensor as vector
+   * @return Dimension value(int32_t) of tensor as vector
+   */
   int32_t asVector(void) const;
-  nnfw::util::feature::Shape asFeature(void) const;
-  nnfw::util::matrix::Shape asMatrix(void) const;
-  nnfw::util::kernel::Shape asKernel(void) const;
-  nnfw::util::tensor::Shape asTensor(void) const;
+  /**
+   * @brief Get dimension values of tensor as feature::Shape
+   * @return Dimension values of tensor as feature::Shape
+   */
+  nnfw::misc::feature::Shape asFeature(void) const;
+  /**
+   * @brief Get dimension values of tensor as matrix::Shape
+   * @return Dimension values of tensor as matrix::Shape
+   */
+  nnfw::misc::matrix::Shape asMatrix(void) const;
+  /**
+   * @brief Get dimension values of tensor as kernel::Shape
+   * @return Dimension values of tensor as kernel::Shape
+   */
+  nnfw::misc::kernel::Shape asKernel(void) const;
+  /**
+   * @brief Get dimension values of tensor::Shape
+   * @return Dimension values of tensor::Shape
+   */
+  nnfw::misc::tensor::Shape asTensor(void) const;
 
 public:
+  /**
+   * @brief Extend rank of Shape object for operand with param.
+   * @param [in] to_rank The rank value to be extended to
+   * @return N/A
+   */
   void extendRank(size_t);
 };
 
@@ -87,27 +136,60 @@ namespace tflite
 namespace operand
 {
 
+/**
+ * @brief Class to have data of operand.
+ */
 struct Data
 {
+  /**
+   * @brief Destruct this object
+   */
   virtual ~Data() = default;
 
+  /**
+   * @brief Get size of data
+   * @return size of data
+   */
   virtual size_t size(void) const = 0;
+  /**
+   * @brief Get the base address of data
+   * @return the base address of data
+   */
   virtual const uint8_t *base(void) const = 0;
 };
 
+/**
+ * @brief Class to have cached data of operand.
+ */
 class CachedData final : public Data
 {
 public:
+  /**
+   * @brief Construct a new CachedData object for operand with param.
+   * @param [in] base the base address of data
+   * @param [in] size the size of data
+   */
   CachedData(const uint8_t *base, size_t size) : _base{new uint8_t[size]}, _size{size}
   {
     std::copy(base, base + size, _base);
   }
 
 public:
+  /**
+   * @brief Destruct this object
+   */
   ~CachedData() { delete[] _base; }
 
 public:
+  /**
+   * @brief Get size of data
+   * @return size of data
+   */
   size_t size(void) const override { return _size; }
+  /**
+   * @brief Get the base address of data
+   * @return the base address of data
+   */
   const uint8_t *base(void) const override { return _base; }
 
 private:
@@ -115,16 +197,32 @@ private:
   size_t _size;
 };
 
+/**
+ * @brief Class to have external data of operand.
+ */
 class ExternalData final : public Data
 {
 public:
+  /**
+   * @brief Construct a new ExternalData object for operand with param.
+   * @param [in] base the base address of data
+   * @param [in] size the size of data
+   */
   ExternalData(const uint8_t *base, size_t size) : _base{base}, _size{size}
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Get size of data
+   * @return size of data
+   */
   size_t size(void) const override { return _size; }
+  /**
+   * @brief Get the base address of data
+   * @return the base address of data
+   */
   const uint8_t *base(void) const override { return _base; }
 
 private:
@@ -148,9 +246,19 @@ namespace tflite
 namespace operand
 {
 
+/**
+ * @brief Class to express operand as object.
+ */
 class Object
 {
 public:
+  /**
+   * @brief Construct a new Object object for operand with param.
+   * @param [in] shape shape of operand
+   * @param [in] type type of operand
+   * @param [in] scale scale of operand
+   * @param [in] zeroPoint zeroPoint of operand
+   */
   explicit Object(const Shape &shape, const int32_t type, const float scale,
                   const int32_t zeroPoint)
       : _shape{shape}, _type{type}, _scale{scale}, _zeroPoint{zeroPoint}
@@ -159,25 +267,58 @@ public:
   }
 
 public:
+  /**
+   * @brief Get shape of operand
+   * @return Reference of shape of operand
+   */
   const Shape &shape(void) const { return _shape; }
+  /**
+   * @brief Get type of operand
+   * @return type of operand
+   */
   const int32_t type(void) const { return _type; }
+  /**
+   * @brief Get scale of operand
+   * @return scale of operand
+   */
   const float scale(void) const { return _scale; }
+  /**
+   * @brief Get zeroPoint of operand
+   * @return zeroPoint of operand
+   */
   const int32_t zeroPoint(void) const { return _zeroPoint; }
 
 private:
   void data(std::unique_ptr<Data> &&data) { _data = std::move(data); }
 
 public:
+  /**
+   * @brief Get data of operand
+   * @return Reference of data of operand
+   */
   const Data &data(void) const { return *_data; }
+  /**
+   * @brief Get true if Object has data, otherwise @c false
+   * @return @c true if Object has data, otherwise @c false
+   */
   bool hasData(void) const { return _data != nullptr; }
 
 public:
+  /**
+   * @brief Set data of operand with param
+   * @param [in] args arguments of data to be set
+   * @return N/A
+   */
   template <typename T, typename... Args> void data(Args &&... args)
   {
     data(std::unique_ptr<T>(new T{std::forward<Args>(args)...}));
   }
 
 public:
+  /**
+   * @brief Get value of data as scalar
+   * @return value of data as scalar
+   */
   template <typename T> T asScalar(void) const
   {
     assert((_shape.rank() == 0) || ((_shape.rank() == 1) && (_shape.dim(0) == 1)));
@@ -188,6 +329,11 @@ public:
   }
 
 public:
+  /**
+   * @brief Get value of data as ReorderBits
+   * @param [in] numOfBits The number of bits to be reordered to
+   * @return value of data as ReorderBits
+   */
   template <typename T> T asReorderBits(size_t numOfBits) const
   {
     assert((_shape.rank() == 0) || ((_shape.rank() == 1) && (_shape.dim(0) == 1)));
@@ -209,8 +355,6 @@ private:
 } // namespace tflite
 } // namespace internal
 
-#include <memory>
-
 namespace internal
 {
 namespace tflite
@@ -218,9 +362,17 @@ namespace tflite
 namespace operand
 {
 
+/**
+ * @brief Class to have object instances in a kind of set
+ */
 class Set
 {
 public:
+  /**
+   * @brief Iterate objects with fn
+   * @param [in] fn function to be iterated
+   * @return N/A
+   */
   void iterate(const std::function<void(const Index &)> &fn)
   {
     for (uint32_t n = 0; n < _objects.size(); ++n)
@@ -231,12 +383,35 @@ public:
   }
 
 public:
+  /**
+   * @brief Append Object for operand with param
+   * @param [in] shape shape of operand
+   * @param [in] type type of operand
+   * @param [in] scale scale of operand
+   * @param [in] zeroPoint zeroPoint of operand
+   * @return Value of Index which has been appended to
+   */
   Index append(const Shape &, int32_t type, float scale, int32_t zeroPoint);
 
 public:
+  /**
+   * @brief Get Object at Index
+   * @param [in] index Index to be at
+   * @return Const refernece of Object
+   */
   const Object &at(const Index &) const;
+  /**
+   * @brief Get Object at Index
+   * @param [in] index Index to be at
+   * @return Refernece of Object
+   */
   Object &at(const Index &);
+  /**
+   * @brief Get size of operands in Set
+   * @return Value of size
+   */
   size_t size(void) const { return _objects.size(); }
+  bool exist(const Index &) const;
 
 private:
   std::vector<std::unique_ptr<Object>> _objects;
@@ -255,16 +430,36 @@ namespace tflite
 namespace op
 {
 
+/**
+ * @brief Class to have sequence operators.
+ */
 class Sequence
 {
 public:
+  /**
+   * @brief Construct a new Sequence object for operator as default
+   */
   Sequence() = default;
 
 public:
+  /**
+   * @brief Get size of operators in Sequence
+   * @return Value of size
+   */
   uint32_t size(void) const { return _ops.size(); }
 
 public:
+  /**
+   * @brief Get op::Node at Index
+   * @param [in] nth index to be at
+   * @return Refernece of op::Node
+   */
   op::Node &at(uint32_t nth) { return *(_ops.at(nth)); }
+  /**
+   * @brief Get op::Node at Index
+   * @param [in] nth index to be at
+   * @return Const refernece of op::Node
+   */
   const op::Node &at(uint32_t nth) const { return *(_ops.at(nth)); }
 
 private:
@@ -275,6 +470,11 @@ private:
   }
 
 public:
+  /**
+   * @brief Add op::Node with param
+   * @param [in] args arguments of op::Node to be set
+   * @return Reference of Sequence
+   */
   template <typename T, typename... Args> Sequence &emplace_back(Args &&... args)
   {
     return emplace_back(std::unique_ptr<T>(new T{std::forward<Args>(args)...}));
@@ -293,14 +493,33 @@ namespace internal
 namespace tflite
 {
 
+/**
+ * @brief Class to have operand::Set as operands and op::Sequence as operators
+ */
 class Model
 {
 public:
+  /**
+   * @brief Get operand::Set
+   * @return Reference of operand::Set
+   */
   operand::Set &operands(void) { return _operands; }
+  /**
+   * @brief Get operand::Set
+   * @return Const reference of operand::Set
+   */
   const operand::Set &operands(void) const { return _operands; }
 
 public:
+  /**
+   * @brief Get op::Sequence
+   * @return Reference of op::Sequence
+   */
   op::Sequence &operations(void) { return _operations; }
+  /**
+   * @brief Get op::Sequence
+   * @return Const reference of op::Sequence
+   */
   const op::Sequence &operations(void) const { return _operations; }
 
 private:
@@ -309,8 +528,8 @@ private:
 
 public:
   // TODO Hide these fields
-  std::vector<operand::Index> inputs;
-  std::vector<operand::Index> outputs;
+  std::vector<operand::Index> inputs;  /**< indexes of operand as input */
+  std::vector<operand::Index> outputs; /**< indexes of operand as output */
 };
 
 } // namespace tflite
diff --git a/runtimes/pure_arm_compute/src/internal/Sink.h b/runtimes/pure_arm_compute/src/internal/Sink.h
index af3b37e61..6f44561ea 100644
--- a/runtimes/pure_arm_compute/src/internal/Sink.h
+++ b/runtimes/pure_arm_compute/src/internal/Sink.h
@@ -14,15 +14,31 @@
  * limitations under the License.
  */
 
+/**
+ * @file    Sink.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines Sink struct
+ */
 #ifndef __INTERNAL_SINK_H__
 #define __INTERNAL_SINK_H__
 
 #include <arm_compute/core/ITensor.h>
 
+/**
+ * @brief Struct to get tensor data from arm compute tensor (abstract)
+ */
 struct Sink
 {
+  /**
+   * @brief Destroy the Sink object
+   */
   virtual ~Sink() = default;
 
+  /**
+   * @brief     Get tensor data from arm compute tensor
+   * @param[in] tensor  Tensor object of arm compute to get data
+   * @return    N/A
+   */
   virtual void pull(::arm_compute::ITensor &tensor) const = 0;
 };
 
diff --git a/runtimes/pure_arm_compute/src/internal/Sinks.h b/runtimes/pure_arm_compute/src/internal/Sinks.h
index e8a7d5966..7317c67c1 100644
--- a/runtimes/pure_arm_compute/src/internal/Sinks.h
+++ b/runtimes/pure_arm_compute/src/internal/Sinks.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file        Sinks.h
+ * @brief       This file contains TensorSink class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
 #ifndef __INTERNAL_SINKS_H__
 #define __INTERNAL_SINKS_H__
 
@@ -28,29 +34,46 @@
 #include "internal/nnapi/tensor/View.h"
 #include "internal/arm_compute/tensor/View.h"
 
-#include "util/tensor/IndexIterator.h"
+#include "misc/tensor/IndexIterator.h"
 
+/**
+ * @brief Class to store NN model output data for general-shaped tensors.
+ * This is for pulling data to internal tensor from other tensor.
+ * @tparam T Type of the data elements
+ */
 template <typename T> class TensorSink final : public Sink
 {
 public:
-  TensorSink(const nnfw::util::tensor::Shape &shape, T *base, const size_t size)
+  /**
+   * @brief Construct a TensorSink object
+   *
+   * @param[in] shape general-shaped tensor dimensions
+   * @param[in] base Base pointer of the actual data
+   * @param[in] size Size of the data
+   */
+  TensorSink(const nnfw::misc::tensor::Shape &shape, T *base, const size_t size)
       : _shape{shape}, _base{base}, _size{size}
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Pull the data into the internal structure
+   * @param[in] tensor The tensor which contains source data
+   * @return N/A
+   */
   void pull(::arm_compute::ITensor &tensor) const override
   {
     const ::internal::arm_compute::tensor::View<T> from{&tensor};
     ::internal::nnapi::tensor::View<T> into{_shape, _base, _size};
 
-    using ::nnfw::util::tensor::iterate;
-    using ::nnfw::util::tensor::Index;
+    using ::nnfw::misc::tensor::iterate;
+    using ::nnfw::misc::tensor::Index;
 
     const uint32_t rank = _shape.rank();
 
-    ::nnfw::util::tensor::iterate(_shape) << [&](const Index &raw) {
+    ::nnfw::misc::tensor::iterate(_shape) << [&](const Index &raw) {
       Index permuted(raw.rank());
 
       for (uint32_t axis = 0; axis < rank; ++axis)
@@ -64,7 +87,7 @@ public:
   }
 
 private:
-  const nnfw::util::tensor::Shape _shape;
+  const nnfw::misc::tensor::Shape _shape;
 
 private:
   T *const _base;
diff --git a/runtimes/pure_arm_compute/src/internal/Source.h b/runtimes/pure_arm_compute/src/internal/Source.h
index a159e5092..fa8f1e811 100644
--- a/runtimes/pure_arm_compute/src/internal/Source.h
+++ b/runtimes/pure_arm_compute/src/internal/Source.h
@@ -14,15 +14,32 @@
  * limitations under the License.
  */
 
+/**
+ * @file Source.h
+ * @brief This file contains Source struct for pushing ITensor
+ * @ingroup COM_AI_RUNTIME
+ */
+
 #ifndef __INTERNAL_SOURCE_H__
 #define __INTERNAL_SOURCE_H__
 
 #include <arm_compute/core/ITensor.h>
 
+/**
+ * @brief Struct to push inner source to ITensor.
+ */
 struct Source
 {
+  /**
+   * @brief Destructor as default
+   */
   virtual ~Source() = default;
 
+  /**
+   * @brief Push inner source to ITensor
+   * @param [in] tensor ITensor to be pushed into
+   * @return N/A
+   */
   virtual void push(::arm_compute::ITensor &tensor) const = 0;
 };
 
diff --git a/runtimes/pure_arm_compute/src/internal/Swizzle.h b/runtimes/pure_arm_compute/src/internal/Swizzle.h
index 66bf7aef6..f127b8a3b 100644
--- a/runtimes/pure_arm_compute/src/internal/Swizzle.h
+++ b/runtimes/pure_arm_compute/src/internal/Swizzle.h
@@ -14,28 +14,53 @@
  * limitations under the License.
  */
 
+/**
+ * @file    Swizzle.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines ARMComputeAxis class and utility functions to support mapping
+ *          between arm compute axis and NNAPI axis
+ */
 #ifndef __SWIZZLE_H__
 #define __SWIZZLE_H__
 
+/**
+ * @brief Class to represent arm compute axis
+ */
 class ARMComputeAxis
 {
 public:
+  /**
+   * @brief Construct a new ARMComputeAxis object
+   */
   ARMComputeAxis() = default;
 
 public:
+  /**
+   * @brief Construct a new ARMComputeAxis object
+   * @param[in] value Raw axis number
+   */
   explicit ARMComputeAxis(uint32_t value) : _value{value}
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief   Get raw axis number
+   * @return  Raw axis number
+   */
   uint32_t value(void) const { return _value; }
 
 private:
   uint32_t _value;
 };
 
-// Convert T/F Lite / NNAPI axis (based on ...NHWC) to ARMCompute axis (WHCN...)
+/**
+ * @brief     Convert T/F Lite / NNAPI axis (based on ...NHWC) to arm compute axis (WHCN...)
+ * @param[in] rank  Rank of shape
+ * @param[in] axis  Axis to map
+ * @return    ARMComputeAxis including arm compute axis info
+ */
 inline ARMComputeAxis ToARMComputeAxis(uint32_t rank, uint32_t axis)
 {
   assert(rank > axis);
@@ -68,6 +93,12 @@ inline ARMComputeAxis ToARMComputeAxis(uint32_t rank, uint32_t axis)
 
 #include <cassert>
 
+/**
+ * @brief     Covert bitmask info from NNAPI axis to arm compute axis
+ * @param[in] in        Bitmask data
+ * @param[in] numOfBits Used bits (rank)
+ * @return    Coverted bitmask
+ */
 template <typename T> inline T ReorderBits(T in, size_t numOfBits)
 {
   assert(numOfBits > 0);
diff --git a/runtimes/pure_arm_compute/src/internal/Tensor3DSink.h b/runtimes/pure_arm_compute/src/internal/Tensor3DSink.h
index 20de3b9e8..1e14e2d6c 100644
--- a/runtimes/pure_arm_compute/src/internal/Tensor3DSink.h
+++ b/runtimes/pure_arm_compute/src/internal/Tensor3DSink.h
@@ -14,6 +14,11 @@
  * limitations under the License.
  */
 
+/**
+ * @file    Tensor3DSink.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines Tensor3DSink class
+ */
 #ifndef __TENSOR3D_SINK_H__
 #define __TENSOR3D_SINK_H__
 
@@ -26,16 +31,30 @@
 #include <arm_compute/core/Window.h>
 #include <arm_compute/core/Helpers.h>
 
+/**
+ * @brief Class to get tensor data from arm compute tensor
+ */
 template <typename T> class Tensor3DSink final : public Sink
 {
 public:
-  Tensor3DSink(const nnfw::util::tensor::Shape &shape, T *base, const size_t size)
+  /**
+   * @brief     Construct a new Tensor3DSink object
+   * @param[in] shape Shape of tensor
+   * @param[in] base  Pointer to get data
+   * @param[in] size  Size of tensor
+   */
+  Tensor3DSink(const nnfw::misc::tensor::Shape &shape, T *base, const size_t size)
       : _shape{shape}, _base{base}, _size{size}
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief     Get tensor data from arm compute tensor to base
+   * @param[in] tensor  Tensor object of arm compute to get data
+   * @return    N/A
+   */
   void pull(::arm_compute::ITensor &tensor) const override
   {
     using ::arm_compute::Window;
@@ -60,7 +79,7 @@ public:
   }
 
 private:
-  const nnfw::util::tensor::Shape _shape;
+  const nnfw::misc::tensor::Shape _shape;
 
 private:
   T *const _base;
diff --git a/runtimes/pure_arm_compute/src/internal/Tensor3DSource.h b/runtimes/pure_arm_compute/src/internal/Tensor3DSource.h
index c100bbdd2..3d8d1b958 100644
--- a/runtimes/pure_arm_compute/src/internal/Tensor3DSource.h
+++ b/runtimes/pure_arm_compute/src/internal/Tensor3DSource.h
@@ -14,6 +14,11 @@
  * limitations under the License.
  */
 
+/**
+ * @file    Tensor3DSource.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines Tensor3DSource class
+ */
 #ifndef __TENSOR3D_SOURCE_H__
 #define __TENSOR3D_SOURCE_H__
 
@@ -26,16 +31,30 @@
 #include <arm_compute/core/Window.h>
 #include <arm_compute/core/Helpers.h>
 
+/**
+ * @brief Class to push tensor data to arm compute tensor
+ */
 template <typename T> class Tensor3DSource final : public Source
 {
 public:
-  Tensor3DSource(const nnfw::util::tensor::Shape &shape, const T *base, const size_t size)
+  /**
+   * @brief     Construct a new Tensor3DSource object
+   * @param[in] shape Shape of tensor
+   * @param[in] base  Pointer of tensor data to push
+   * @param[in] size  Size of tensor
+   */
+  Tensor3DSource(const nnfw::misc::tensor::Shape &shape, const T *base, const size_t size)
       : _shape{shape}, _base{base}, _size{size}
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief       Push tensor data to arm compute tensor
+   * @param[out]  tensor  Tensor object of arm compute to push tensor data
+   * @return      N/A
+   */
   void push(::arm_compute::ITensor &tensor) const override
   {
     using ::arm_compute::Window;
@@ -60,7 +79,7 @@ public:
   }
 
 private:
-  const nnfw::util::tensor::Shape _shape;
+  const nnfw::misc::tensor::Shape _shape;
 
 private:
   const T *const _base;
diff --git a/runtimes/pure_arm_compute/src/internal/TensorSource.h b/runtimes/pure_arm_compute/src/internal/TensorSource.h
index 0ddc44855..114d3588e 100644
--- a/runtimes/pure_arm_compute/src/internal/TensorSource.h
+++ b/runtimes/pure_arm_compute/src/internal/TensorSource.h
@@ -14,11 +14,17 @@
  * limitations under the License.
  */
 
+/**
+ * @file TensorSource.h
+ * @brief This file contains TensorSource class which is inherited from Source class
+ * @ingroup COM_AI_RUNTIME
+ */
+
 #ifndef __INTERNAL_TENSOR_SOURCE_H__
 #define __INTERNAL_TENSOR_SOURCE_H__
 
-#include <util/tensor/Shape.h>
-#include <util/tensor/IndexIterator.h>
+#include <misc/tensor/Shape.h>
+#include <misc/tensor/IndexIterator.h>
 
 #include "internal/Source.h"
 #include "internal/Swizzle.h"
@@ -26,24 +32,38 @@
 #include "internal/arm_compute/tensor/View.h"
 
 // NOTE TensorSource is much slower than specialized Source(s)
+/**
+ * @brief Class to define constructor and push function
+ */
 template <typename T> class TensorSource final : public Source
 {
 public:
-  TensorSource(const nnfw::util::tensor::Shape &shape, const T *base, const size_t size)
+  /**
+   * @brief Construct a new TensorSource object with params
+   * @param [in] shape Shape of tensor
+   * @param [in] base Base address
+   * @param [in] size Size of tensor
+   */
+  TensorSource(const nnfw::misc::tensor::Shape &shape, const T *base, const size_t size)
       : _shape{shape}, _base{base}, _size{size}
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Function for pushing tensor
+   * @param [in] tensor Tensor to be pushed
+   * @return N/A
+   */
   void push(::arm_compute::ITensor &tensor) const override
   {
     const ::internal::nnapi::tensor::Reader<T> from{_shape, _base, _size};
     ::internal::arm_compute::tensor::View<T> into{&tensor};
 
-    ::nnfw::util::tensor::iterate(_shape) << [&](const nnfw::util::tensor::Index &index_nnapi) {
+    ::nnfw::misc::tensor::iterate(_shape) << [&](const nnfw::misc::tensor::Index &index_nnapi) {
       const auto rank = index_nnapi.rank();
-      nnfw::util::tensor::Index index_ACL(rank);
+      nnfw::misc::tensor::Index index_ACL(rank);
 
       for (uint32_t axis = 0; axis < rank; ++axis)
       {
@@ -55,7 +75,7 @@ public:
   }
 
 private:
-  const nnfw::util::tensor::Shape _shape;
+  const nnfw::misc::tensor::Shape _shape;
   const T *const _base;
   const size_t _size;
 };
diff --git a/runtimes/pure_arm_compute/src/internal/VectorSink.h b/runtimes/pure_arm_compute/src/internal/VectorSink.h
index d1bf962e2..a630ef1c1 100644
--- a/runtimes/pure_arm_compute/src/internal/VectorSink.h
+++ b/runtimes/pure_arm_compute/src/internal/VectorSink.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file        VectorSink.h
+ * @brief       This file contains VectorSink class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
 #ifndef __INTERNAL_VECTOR_SINK_H__
 #define __INTERNAL_VECTOR_SINK_H__
 
@@ -23,18 +29,31 @@
 
 #include <cassert>
 
-//
-// VectorSink
-//
+/**
+ * @brief Class to store vector(2D) output data.
+ * This is for pulling out the data to another tensor.
+ * @tparam T Type of the data elements
+ */
 template <typename T> class VectorSink final : public Sink
 {
 public:
+  /**
+   * @brief Construct a VectorSink object
+   * @param[in] vlen Length of the vector
+   * @param[in] base Base pointer of the actual data
+   * @param[in] size Size of the data
+   */
   VectorSink(const int32_t vlen, T *base, const size_t size) : _vlen{vlen}, _base{base}
   {
     assert(size >= _vlen * sizeof(T));
   }
 
 public:
+  /**
+   * @brief Pull the data into the internal structure
+   * @param[in] tensor The tensor which contains source data
+   * @return N/A
+   */
   void pull(::arm_compute::ITensor &tensor) const override
   {
     for (int32_t n = 0; n < _vlen; ++n)
diff --git a/runtimes/pure_arm_compute/src/internal/VectorSource.h b/runtimes/pure_arm_compute/src/internal/VectorSource.h
index 41aab07e4..48d3d3209 100644
--- a/runtimes/pure_arm_compute/src/internal/VectorSource.h
+++ b/runtimes/pure_arm_compute/src/internal/VectorSource.h
@@ -14,20 +14,42 @@
  * limitations under the License.
  */
 
+/**
+ * @file        VectorSource.h
+ * @brief       This file contains VectorSource class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
 #ifndef __INTERNAL_VECTOR_SOURCE_H__
 #define __INTERNAL_VECTOR_SOURCE_H__
 
 #include "internal/Source.h"
 
+/**
+ * @brief Class to store vector(2D) input data.
+ * This is for push out the data to another tensor.
+ * @tparam T Type of the data elements
+ */
 template <typename T> class VectorSource final : public Source
 {
 public:
+  /**
+   * @brief Construct a VectorSource object
+   * @param[in] vlen Length of the vector
+   * @param[in] base Base pointer of the actual data
+   * @param[in] size Size of the data
+   */
   VectorSource(const int32_t vlen, const T *base, const size_t size) : _vlen{vlen}, _base{base}
   {
     assert(size >= _vlen * sizeof(T));
   }
 
 public:
+  /**
+   * @brief Push the data out to the another tensor
+   * @param[out] The tensor that output data will be stored
+   * @return N/A
+   */
   void push(::arm_compute::ITensor &tensor) const override
   {
     for (int32_t n = 0; n < _vlen; ++n)
diff --git a/runtimes/pure_arm_compute/src/internal/arm_compute.h b/runtimes/pure_arm_compute/src/internal/arm_compute.h
index ef43ed45c..fb6acaf81 100644
--- a/runtimes/pure_arm_compute/src/internal/arm_compute.h
+++ b/runtimes/pure_arm_compute/src/internal/arm_compute.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file        arm_compute.h
+ * @brief       This file contains arm_compute library related classes
+ * @ingroup     COM_AI_RUNTIME
+ */
+
 #ifndef __INTERNAL_ARM_COMPUTE_H__
 #define __INTERNAL_ARM_COMPUTE_H__
 
@@ -28,6 +34,9 @@ namespace arm_compute
 namespace operand
 {
 
+/**
+ * @brief Class to access the tensor object
+ */
 class Object
 {
 public:
@@ -40,12 +49,21 @@ public:
   }
 
 public:
+  /**
+   * @brief Get the tensor pointer
+   * @return The tensor pointer
+   */
   ::arm_compute::ITensor *ptr(void) const { return _tensor.get(); }
 
 private:
   std::shared_ptr<::arm_compute::ITensor> _tensor;
 
 public:
+  /**
+   * @brief Access the tensor object and run the given function
+   : @param[in] fn The actual behavior when accessing the tensor object
+   * @return N/A
+   */
   void access(const std::function<void(::arm_compute::ITensor &tensor)> &fn) const;
 };
 
@@ -64,24 +82,48 @@ namespace arm_compute
 namespace operand
 {
 
+/**
+ * @brief Class to manage Object instances
+ */
 class Context
 {
 public:
+  /**
+   * @brief Set index and tensor pair
+   * @param[in] ind The operand index
+   * @param[in] tensor The tensor object
+   * @return This object reference
+   */
   Context &set(const ::internal::tflite::operand::Index &ind,
                const std::shared_ptr<::arm_compute::ITensor> &tensor);
 
 public:
+  /**
+   * @brief Check if the tensor for given index is exist
+   * @param[in] ind The operand Index
+   * @return @c true if the entry for ind is exist, otherwise @c false
+   */
   bool exist(const ::internal::tflite::operand::Index &ind) const
   {
     return _objects.find(ind.asInt()) != _objects.end();
   }
 
 public:
+  /**
+   * @brief Lookup the tensor with the given index
+   * @param[in] ind The index as the key
+   * @return The object const reference
+   */
   const Object &at(const ::internal::tflite::operand::Index &ind) const
   {
     return _objects.at(ind.asInt());
   }
 
+  /**
+   * @brief Lookup the tensor with the given index
+   * @param[in] ind The index as the key
+   * @return The object reference
+   */
   Object &at(const ::internal::tflite::operand::Index &ind) { return _objects.at(ind.asInt()); }
 
 private:
@@ -101,19 +143,38 @@ namespace arm_compute
 namespace op
 {
 
+/**
+ * @brief Class to wrap IFunction
+ */
 class Step
 {
 public:
+  /**
+   * @brief Construct a Step object
+   * @param[in] func The compiled code to be executed
+   */
   Step(std::unique_ptr<::arm_compute::IFunction> &&func) : _func{std::move(func)}
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Run _func
+   * @return N/A
+   */
   void run(void) const { _func->run(); }
 
 public:
+  /**
+   * @brief Get member @c _name
+   * @return The name as const reference
+   */
   const std::string &name(void) const { return _name; }
+  /**
+   * @brief Get member @c _name
+   * @return The name as reference
+   */
   std::string &name(void) { return _name; }
 
 private:
@@ -121,7 +182,15 @@ private:
   std::unique_ptr<::arm_compute::IFunction> _func;
 #ifdef TFLITE_PROFILING_ENABLED
 public:
+  /**
+   * @brief Get member @c _op_index
+   * @return The operation index as value
+   */
   int op_idx() const { return _op_idx; }
+  /**
+   * @brief Get member @c _op_index
+   * @return The operation index as reference
+   */
   int &op_idx() { return _op_idx; }
 private:
   int _op_idx;
@@ -139,12 +208,24 @@ namespace arm_compute
 namespace op
 {
 
+/**
+ * @brief Class managing compiled operation code Sequence
+ */
 class Sequence
 {
 public:
+  /**
+   * @brief Get size of sequence
+   * @return Number of sequence steps
+   */
   uint32_t size(void) const { return _functions.size(); }
 
 public:
+  /**
+   * @brief Append a Function to the sequence
+   * @param[in] func Function to be appended
+   * @return This object reference
+   */
   Sequence &append(std::unique_ptr<::arm_compute::IFunction> &&func)
   {
     _functions.emplace_back(std::move(func));
@@ -152,7 +233,17 @@ public:
   }
 
 public:
+  /**
+   * @brief Get the step entry on the index @c n
+   * @param[in] n The index
+   * @return The step object as reference
+   */
   Step &at(uint32_t n) { return _functions.at(n); }
+  /**
+   * @brief Get the step entry on the index @c n
+   * @param[in] n The index
+   * @return The step object as const reference
+   */
   const Step &at(uint32_t n) const { return _functions.at(n); }
 
 private:
@@ -169,23 +260,50 @@ namespace internal
 namespace arm_compute
 {
 
+/**
+ * @brief Class to manage compiled operation sequence
+ */
 class Plan
 {
 public:
+  /**
+   * @brief Construct a Plan object
+   * @param[in] model Model that we want to compile
+   */
   Plan(const std::shared_ptr<const ::internal::tflite::Model> &model) : _model(model)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Get the model object
+   * @return The model object as const reference
+   */
   const ::internal::tflite::Model &model(void) const { return *_model; }
 
 public:
+  /**
+   * @brief Get operand context
+   * @return The operand context as reference
+   */
   operand::Context &operands(void) { return _operands; }
+  /**
+   * @brief Get operand context
+   * @return The operand context as const reference
+   */
   const operand::Context &operands(void) const { return _operands; }
 
 public:
+  /**
+   * @brief Get operation sequence
+   * @return The operation sequence as reference
+   */
   op::Sequence &operations(void) { return _ops; }
+  /**
+   * @brief Get operation sequence
+   * @return The operation sequence as const reference
+   */
   const op::Sequence &operations(void) const { return _ops; }
 
 private:
@@ -204,7 +322,10 @@ namespace internal
 namespace arm_compute
 {
 
-// check if this runtime runs on GPU or NEON
+/**
+ * @brief Check if this runtime runs on GPU or NEON
+ * @return @c true if GPU mode, otherwise @c false
+ */
 bool isGpuMode();
 
 #define CAST_CL(tensor) static_cast<::arm_compute::CLTensor *>(tensor)
diff --git a/runtimes/pure_arm_compute/src/internal/arm_compute/Cast.cc b/runtimes/pure_arm_compute/src/internal/arm_compute/Cast.cc
new file mode 100644
index 000000000..ff2f79309
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/arm_compute/Cast.cc
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/arm_compute/Cast.h"
+
+#include "internal/Swizzle.h"
+
+::arm_compute::Coordinates getARMComputeAxises(uint32_t rank)
+{
+  ::arm_compute::Coordinates res{};
+
+  res.set_num_dimensions(rank);
+
+  for (uint32_t axis = 0; axis < rank; ++axis)
+  {
+    res.set(axis, ToARMComputeAxis(rank, axis).value());
+  }
+
+  return res;
+}
+
+::arm_compute::Coordinates asARMComputeCoordinates(const ::arm_compute::Coordinates &runtime_coord,
+                                                   const ::arm_compute::Coordinates &axises)
+{
+  ::arm_compute::Coordinates id{};
+  assert(runtime_coord.num_dimensions() == axises.num_dimensions());
+  for (size_t i = 0; i < runtime_coord.num_dimensions(); ++i)
+  {
+    id.set(axises[i], runtime_coord[i]);
+  }
+  return id;
+}
+
+// Restructure runtime_permutationVector to ACL_permutationVector
+::arm_compute::PermutationVector getARMComputePermutationVector(uint32_t rank,
+                                                                const int32_t *runtime_pv)
+{
+  // rank upto 4 is supported
+  assert(rank <= 4);
+  assert(runtime_pv != nullptr);
+
+  int new_pv[4] = {0};
+  ::arm_compute::Coordinates axises = getARMComputeAxises(rank);
+
+  if (rank == 4)
+  {
+    /**
+    axises = {3,1,0,2}
+       NNAPI      PermutationVector
+    N   0               3
+    H   1               1
+    W   2               0
+    C   3               2
+    **/
+    new_pv[0] = axises[runtime_pv[2]];
+    new_pv[1] = axises[runtime_pv[1]];
+    new_pv[2] = axises[runtime_pv[3]];
+    new_pv[3] = axises[runtime_pv[0]];
+  }
+  else
+  {
+    /**
+    mapping/axises = {rank-1 to 0}
+    CHW --------> WHC
+    or
+    WH ----------> HW
+    **/
+    for (int id = 0; id < rank; ++id)
+    {
+      new_pv[id] = axises[runtime_pv[rank - id - 1]];
+    }
+  }
+
+  return ::arm_compute::PermutationVector{new_pv[0], new_pv[1], new_pv[2], new_pv[3]};
+}
+
+::arm_compute::TensorShape asTensorShape(const internal::tflite::operand::Shape &shape,
+                                         bool apply_dim_correction)
+{
+  const uint32_t rank = shape.rank();
+
+  ::arm_compute::TensorShape res{};
+
+  res.set_num_dimensions(rank);
+
+  for (uint32_t axis = 0; axis < rank; ++axis)
+  {
+    // NOTE In some cases, in incorrect dimensions is required.
+    // For example, intput_size is 1 in LSTM. The input-to-input weights([num_units, input_size]) of
+    // LSTM is used as the weight of the FullyConnected.
+    // The FullyConnected's weight must be greater or equal than 2-dimensions.
+    // However, if the dimension correction is applied to input_to_input_weights with input_size
+    // equal to 1, it will be changed to 1-D.
+    // So input_to_input_weights is not used by the weight of FullyConnected.
+    res.set(ToARMComputeAxis(rank, axis).value(), shape.dim(axis), apply_dim_correction);
+  }
+
+  return res;
+}
+
+::arm_compute::DataType asDataType(const int32_t type)
+{
+  switch (type)
+  {
+    case ANEURALNETWORKS_FLOAT32:
+    case ANEURALNETWORKS_TENSOR_FLOAT32:
+      return ::arm_compute::DataType::F32;
+    case ANEURALNETWORKS_INT32:
+    case ANEURALNETWORKS_TENSOR_INT32:
+      return ::arm_compute::DataType::S32;
+    case ANEURALNETWORKS_UINT32:
+      return ::arm_compute::DataType::U32;
+    case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
+      return ::arm_compute::DataType::QASYMM8;
+    default:
+      throw std::runtime_error("Not supported, yet");
+      break;
+  }
+}
+
+::arm_compute::ActivationLayerInfo asActivationInfo(FuseCode code)
+{
+  switch (code)
+  {
+    case ANEURALNETWORKS_FUSED_NONE:
+      return ::arm_compute::ActivationLayerInfo{};
+    case ANEURALNETWORKS_FUSED_RELU:
+      return ::arm_compute::ActivationLayerInfo{
+          ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
+    case ANEURALNETWORKS_FUSED_RELU1:
+      return ::arm_compute::ActivationLayerInfo{
+          ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
+    case ANEURALNETWORKS_FUSED_RELU6:
+      return ::arm_compute::ActivationLayerInfo{
+          ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f};
+    default:
+      throw std::runtime_error("Not supported, yet");
+      break;
+  }
+}
+
+::arm_compute::QuantizationInfo asQuantizationInfo(const float scale, const int32_t offset)
+{
+  return ::arm_compute::QuantizationInfo(scale, offset);
+}
+
+::arm_compute::TensorInfo asTensorInfo(const ::arm_compute::TensorShape &shape, const int32_t type,
+                                       const float scale, const int32_t zeroPoint)
+{
+  return ::arm_compute::TensorInfo(shape, 1, asDataType(type),
+                                   asQuantizationInfo(scale, zeroPoint));
+}
diff --git a/runtimes/pure_arm_compute/src/internal/arm_compute/Cast.h b/runtimes/pure_arm_compute/src/internal/arm_compute/Cast.h
index e2ceb8fef..42b547feb 100644
--- a/runtimes/pure_arm_compute/src/internal/arm_compute/Cast.h
+++ b/runtimes/pure_arm_compute/src/internal/arm_compute/Cast.h
@@ -14,104 +14,98 @@
  * limitations under the License.
  */
 
+/**
+ * @file    Cast.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines casting functions from internal object to arm compute object
+ */
 #ifndef __ARM_COMPUTE_CAST_H__
+#define __ARM_COMPUTE_CAST_H__
 
+#include <arm_compute/core/Coordinates.h>
+#include <arm_compute/core/TensorInfo.h>
 #include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/Types.h>
 
-#include "internal/Swizzle.h"
-#include "internal/Model.h"
-
-inline ::arm_compute::Coordinates getARMComputeAxises(uint32_t rank)
-{
-  ::arm_compute::Coordinates res{};
-
-  res.set_num_dimensions(rank);
-
-  for (uint32_t axis = 0; axis < rank; ++axis)
-  {
-    res.set(axis, ToARMComputeAxis(rank, axis).value());
-  }
-
-  return res;
-}
-
-inline ::arm_compute::TensorShape asTensorShape(const internal::tflite::operand::Shape &shape,
-                                                bool apply_dim_correction = true)
-{
-  const uint32_t rank = shape.rank();
+#include <NeuralNetworks.h>
 
-  ::arm_compute::TensorShape res{};
-
-  res.set_num_dimensions(rank);
-
-  for (uint32_t axis = 0; axis < rank; ++axis)
-  {
-    // NOTE In some cases, in incorrect dimensions is required.
-    // For example, intput_size is 1 in LSTM. The input-to-input weights([num_units, input_size]) of
-    // LSTM is used as the weight of the FullyConnected.
-    // The FullyConnected's weight must be greater or equal than 2-dimensions.
-    // However, if the dimension correction is applied to input_to_input_weights with input_size
-    // equal to 1, it will be changed to 1-D.
-    // So input_to_input_weights is not used by the weight of FullyConnected.
-    res.set(ToARMComputeAxis(rank, axis).value(), shape.dim(axis), apply_dim_correction);
-  }
-
-  return res;
-}
+#include "internal/Model.h"
 
-::arm_compute::DataType asDataType(const int32_t type)
-{
-  switch (type)
-  {
-    case ANEURALNETWORKS_FLOAT32:
-    case ANEURALNETWORKS_TENSOR_FLOAT32:
-      return ::arm_compute::DataType::F32;
-    case ANEURALNETWORKS_INT32:
-    case ANEURALNETWORKS_TENSOR_INT32:
-      return ::arm_compute::DataType::S32;
-    case ANEURALNETWORKS_UINT32:
-      return ::arm_compute::DataType::U32;
-    case ANEURALNETWORKS_TENSOR_QUANT8_ASYMM:
-      return ::arm_compute::DataType::QASYMM8;
-    default:
-      throw std::runtime_error("Not supported, yet");
-      break;
-  }
-}
+/**
+ * @brief     Generate arm compute coordinate object from rank
+ * @param[in] rank  Rank number
+ * @return    Coordinate object
+ */
+::arm_compute::Coordinates getARMComputeAxises(uint32_t rank);
+
+/**
+ * @brief     Generate arm compute coordinate object from runtime coordinate object
+ * @param[in] runtime_coord        Runtime coordinates object
+ * @param[in] axises               Coordinates for axises to map runtime-coordinates to
+ *                                 arm_compute-coordinates
+ * @return    Arm_compute coordinate object
+ */
+::arm_compute::Coordinates asARMComputeCoordinates(const ::arm_compute::Coordinates &runtime_coord,
+                                                   const ::arm_compute::Coordinates &axises);
+
+/**
+* @brief      Generate arm compute permutation vector from runtime permutation vector
+* @param[in]  rank                 Rank number supported upto 4
+* @param[in]  runtime_pv           Integer array for runtime permutation vector
+* @return     Permutation vector of arm compute
+*/
+::arm_compute::PermutationVector getARMComputePermutationVector(uint32_t rank,
+                                                                const int32_t *runtime_pv);
+/**
+ * @brief     Cast from shape of internal to arm compute
+ * @param[in] shape                 Internal shape object
+ * @param[in] apply_dim_correction  Flag to state whether apply dimension correction after setting
+ *                                  one dimension in arm compute
+ * @return    TensorShape object of arm compute
+ */
+::arm_compute::TensorShape asTensorShape(const internal::tflite::operand::Shape &shape,
+                                         bool apply_dim_correction = true);
 
-::arm_compute::ActivationLayerInfo asActivationInfo(FuseCode code)
-{
-  switch (code)
-  {
-    case ANEURALNETWORKS_FUSED_NONE:
-      return ::arm_compute::ActivationLayerInfo{};
-    case ANEURALNETWORKS_FUSED_RELU:
-      return ::arm_compute::ActivationLayerInfo{
-          ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
-    case ANEURALNETWORKS_FUSED_RELU1:
-      return ::arm_compute::ActivationLayerInfo{
-          ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
-    case ANEURALNETWORKS_FUSED_RELU6:
-      return ::arm_compute::ActivationLayerInfo{
-          ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.0f};
-    default:
-      throw std::runtime_error("Not supported, yet");
-      break;
-  }
-}
+/**
+ * @brief     Cast from data type enum of NNAPI to arm compute
+ * @param[in] type  NNAPI data type
+ * @return    Data type of arm compute
+ */
+::arm_compute::DataType asDataType(const int32_t type);
 
-::arm_compute::QuantizationInfo asQuantizationInfo(const float scale, const int32_t offset)
-{
-  return ::arm_compute::QuantizationInfo(scale, offset);
-}
+/**
+ * @brief     Cast from NNAPI activation type enum to activation object of arm compute
+ * @param[in] code  NNAPI activation type
+ * @return    ActivationLayerInfo object of arm compute
+ */
+::arm_compute::ActivationLayerInfo asActivationInfo(FuseCode code);
 
+/**
+ * @brief     Generate quantization info object of arm compute
+ * @param[in] scale   Scale of quantization
+ * @param[in] offset  Offset of quantization
+ * @return    QuantizationInfo object of arm compute
+ */
+::arm_compute::QuantizationInfo asQuantizationInfo(const float scale, const int32_t offset);
+
+/**
+ * @brief     Cast from internal tensor info to tensor info object of arm compute
+ * @param[in] shape     Tensor shape
+ * @param[in] type      Tensor type
+ * @param[in] scale     Scale of tensor quantization
+ * @param[in] zeroPoint Zeropoint of tensor quantization
+ * @return    TensorInfo object of arm compute
+ */
 ::arm_compute::TensorInfo asTensorInfo(const ::arm_compute::TensorShape &shape, const int32_t type,
-                                       const float scale = 0.0f, const int32_t zeroPoint = 0)
-{
-  return ::arm_compute::TensorInfo(shape, 1, asDataType(type),
-                                   asQuantizationInfo(scale, zeroPoint));
-}
-
+                                       const float scale = 0.0f, const int32_t zeroPoint = 0);
+
+/**
+ * @brief       Set value to arm compute tensor with casting
+ * @param[in]   value Value to set
+ * @param[out]  to    Target tensor of arm compute
+ * @param[in]   id    Position of element
+ * @return      N/A
+ */
 template <typename FromT>
 void copyCast(const FromT value, ::arm_compute::ITensor *to, const ::arm_compute::Coordinates &id)
 {
diff --git a/runtimes/pure_arm_compute/src/internal/arm_compute/feature/View.h b/runtimes/pure_arm_compute/src/internal/arm_compute/feature/View.h
index 9d19021ae..c989ef4c2 100644
--- a/runtimes/pure_arm_compute/src/internal/arm_compute/feature/View.h
+++ b/runtimes/pure_arm_compute/src/internal/arm_compute/feature/View.h
@@ -14,10 +14,15 @@
  * limitations under the License.
  */
 
+/**
+ * @file    View.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines internal::arm_compute::feature::View class
+ */
 #ifndef __INTERNAL_ARM_COMPUTE_FEATURE_VIEW_H__
 #define __INTERNAL_ARM_COMPUTE_FEATURE_VIEW_H__
 
-#include "util/feature/Reader.h"
+#include "misc/feature/Reader.h"
 
 #include <arm_compute/core/ITensor.h>
 
@@ -28,15 +33,29 @@ namespace arm_compute
 namespace feature
 {
 
-template <typename T> class View final : public nnfw::util::feature::Reader<T>
+/**
+ * @brief Class to access feature's element
+ */
+template <typename T> class View final : public nnfw::misc::feature::Reader<T>
 {
 public:
+  /**
+   * @brief     Construct a new View object
+   * @param[in] tensor  Feature to support access
+   */
   View(::arm_compute::ITensor *tensor) : _tensor{tensor}
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief     Get value of element in 3D feature using channel, row and column
+   * @param[in] ch  Channel index
+   * @param[in] row Row index
+   * @param[in] col Column index
+   * @return    Value of element
+   */
   T at(uint32_t ch, uint32_t row, uint32_t col) const override
   {
     const auto offset = feature_index_to_byte_offset(ch, row, col);
@@ -46,6 +65,14 @@ public:
     return *ptr;
   }
 
+  /**
+   * @brief     Get value of element in 4D feature using batch, channel, row and column
+   * @param[in] batch Batch index
+   * @param[in] ch    Channel index
+   * @param[in] row   Row index
+   * @param[in] col   Column index
+   * @return    Value of element
+   */
   T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const override
   {
     const auto offset = feature_index_to_byte_offset(batch, ch, row, col);
@@ -56,6 +83,13 @@ public:
   }
 
 public:
+  /**
+   * @brief     Get reference of element in 3D feature using channel, row and column
+   * @param[in] ch  Channel index
+   * @param[in] row Row index
+   * @param[in] col Column index
+   * @return    Reference of element
+   */
   T &at(uint32_t ch, uint32_t row, uint32_t col)
   {
     const auto offset = feature_index_to_byte_offset(ch, row, col);
@@ -65,6 +99,14 @@ public:
     return *ptr;
   }
 
+  /**
+   * @brief     Get reference of element in 4D feature using batch, channel, row and column
+   * @param[in] batch Batch index
+   * @param[in] ch    Channel index
+   * @param[in] row   Row index
+   * @param[in] col   Column index
+   * @return    Reference of element
+   */
   T &at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col)
   {
     const auto offset = feature_index_to_byte_offset(batch, ch, row, col);
@@ -75,12 +117,27 @@ public:
   }
 
 private:
+  /**
+   * @brief     Get offset of element in 3D feature
+   * @param[in] ch  Channel index
+   * @param[in] row Row index
+   * @param[in] col Column index
+   * @return    Offset of element
+   */
   size_t feature_index_to_byte_offset(uint32_t ch, uint32_t row, uint32_t col) const
   {
     // ARM Compute uses CHW ordering
     return _tensor->info()->offset_element_in_bytes(::arm_compute::Coordinates{col, row, ch});
   }
 
+  /**
+   * @brief     Get offset of element in 4D feature
+   * @param[in] batch Batch index
+   * @param[in] ch    Channel index
+   * @param[in] row   Row index
+   * @param[in] col   Column index
+   * @return    Offset of element
+   */
   size_t feature_index_to_byte_offset(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const
   {
     // ARM Compute uses CHW ordering
diff --git a/runtimes/pure_arm_compute/src/internal/arm_compute/kernel/View.h b/runtimes/pure_arm_compute/src/internal/arm_compute/kernel/View.h
index 28054d7c8..399cdf913 100644
--- a/runtimes/pure_arm_compute/src/internal/arm_compute/kernel/View.h
+++ b/runtimes/pure_arm_compute/src/internal/arm_compute/kernel/View.h
@@ -14,11 +14,16 @@
  * limitations under the License.
  */
 
+/**
+ * @file    View.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines internel::arm_compute::kernel::View class
+ */
 #ifndef __INTERNAL_ARM_COMPUTE_KERNEL_VIEW_H__
 #define __INTERNAL_ARM_COMPUTE_KERNEL_VIEW_H__
 
-#include "util/kernel/Shape.h"
-#include "util/kernel/Reader.h"
+#include "misc/kernel/Shape.h"
+#include "misc/kernel/Reader.h"
 
 #include <arm_compute/core/ITensor.h>
 
@@ -29,15 +34,30 @@ namespace arm_compute
 namespace kernel
 {
 
-template <typename T> class View final : public nnfw::util::kernel::Reader<T>
+/**
+ * @brief Class to access kernel's element
+ */
+template <typename T> class View final : public nnfw::misc::kernel::Reader<T>
 {
 public:
+  /**
+   * @brief     Construct a new View object
+   * @param[in] tensor  Kernel to support access
+   */
   View(::arm_compute::ITensor *tensor) : _tensor{tensor}
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief     Get value of element in kernel
+   * @param[in] nth Kernel index
+   * @param[in] ch  Channel index
+   * @param[in] row Row index
+   * @param[in] col Column index
+   * @return    Value of element
+   */
   T at(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) const override
   {
     const auto offset = kernel_index_to_byte_offset(nth, ch, row, col);
@@ -48,6 +68,14 @@ public:
   }
 
 public:
+  /**
+   * @brief     Get reference of element in kernel
+   * @param[in] nth Kernel index
+   * @param[in] ch  Channel index
+   * @param[in] row Row index
+   * @param[in] col Column index
+   * @return    Reference of element
+   */
   T &at(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col)
   {
     const auto offset = kernel_index_to_byte_offset(nth, ch, row, col);
@@ -58,6 +86,14 @@ public:
   }
 
 private:
+  /**
+   * @brief     Get offset of element in kernel
+   * @param[in] nth Kernel index
+   * @param[in] ch  Channel index
+   * @param[in] row Row index
+   * @param[in] col Column index
+   * @return    Offset of element
+   */
   size_t kernel_index_to_byte_offset(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) const
   {
     return _tensor->info()->offset_element_in_bytes(::arm_compute::Coordinates{col, row, ch, nth});
diff --git a/runtimes/pure_arm_compute/src/internal/arm_compute/matrix/View.h b/runtimes/pure_arm_compute/src/internal/arm_compute/matrix/View.h
index e3534294f..305fff729 100644
--- a/runtimes/pure_arm_compute/src/internal/arm_compute/matrix/View.h
+++ b/runtimes/pure_arm_compute/src/internal/arm_compute/matrix/View.h
@@ -14,11 +14,16 @@
  * limitations under the License.
  */
 
+/**
+ * @file    View.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines internal::arm_compute::matrix::View class
+ */
 #ifndef __INTERNAL_ARM_COMPUTE_MATRIX_VIEW_H__
 #define __INTERNAL_ARM_COMPUTE_MATRIX_VIEW_H__
 
-#include "util/matrix/Shape.h"
-#include "util/matrix/Reader.h"
+#include "misc/matrix/Shape.h"
+#include "misc/matrix/Reader.h"
 
 #include <arm_compute/core/ITensor.h>
 
@@ -29,15 +34,28 @@ namespace arm_compute
 namespace matrix
 {
 
-template <typename T> class View final : public nnfw::util::matrix::Reader<T>
+/**
+ * @brief Class to access matrix's element
+ */
+template <typename T> class View final : public nnfw::misc::matrix::Reader<T>
 {
 public:
+  /**
+   * @brief     Construct a new View object
+   * @param[in] tensor  Matrix to support access
+   */
   View(::arm_compute::ITensor *tensor) : _tensor{tensor}
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief     Get value of element in matrix
+   * @param[in] row Row index
+   * @param[in] col Column index
+   * @return    Value of element
+   */
   T at(uint32_t row, uint32_t col) const override
   {
     const auto offset = matrix_index_to_byte_offset(row, col);
@@ -48,6 +66,12 @@ public:
   }
 
 public:
+  /**
+   * @brief     Get reference of element in matrix
+   * @param[in] row Row index
+   * @param[in] col Column index
+   * @return    Refence of element
+   */
   T &at(uint32_t row, uint32_t col)
   {
     const auto offset = matrix_index_to_byte_offset(row, col);
@@ -58,6 +82,12 @@ public:
   }
 
 private:
+  /**
+   * @brief     Get offset of element in matrix
+   * @param[in] row Row index
+   * @param[in] col Column index
+   * @return    Offset of element
+   */
   size_t matrix_index_to_byte_offset(uint32_t row, uint32_t col) const
   {
     return _tensor->info()->offset_element_in_bytes(::arm_compute::Coordinates{col, row});
diff --git a/runtimes/pure_arm_compute/src/internal/arm_compute/tensor/View.h b/runtimes/pure_arm_compute/src/internal/arm_compute/tensor/View.h
index 0d8f2ab81..372bd682d 100644
--- a/runtimes/pure_arm_compute/src/internal/arm_compute/tensor/View.h
+++ b/runtimes/pure_arm_compute/src/internal/arm_compute/tensor/View.h
@@ -14,11 +14,16 @@
  * limitations under the License.
  */
 
+/**
+ * @file    View.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines internal::arm_compute::tensor::View class
+ */
 #ifndef __INTERNAL_ARM_COMPUTE_TENSOR_VIEW_H__
 #define __INTERNAL_ARM_COMPUTE_TENSOR_VIEW_H__
 
-#include "util/tensor/Shape.h"
-#include "util/tensor/Index.h"
+#include "misc/tensor/Shape.h"
+#include "misc/tensor/Index.h"
 
 #include <arm_compute/core/ITensor.h>
 
@@ -29,16 +34,28 @@ namespace arm_compute
 namespace tensor
 {
 
+/**
+ * @brief Class to access tensor's element
+ */
 template <typename T> class View
 {
 public:
+  /**
+   * @brief     Construct a new View object
+   * @param[in] tensor  Tensor to support access
+   */
   View(::arm_compute::ITensor *tensor) : _tensor{tensor}
   {
     // DO NOTHING
   }
 
 private:
-  uint32_t byte_offset_of(const nnfw::util::tensor::Index &index) const
+  /**
+   * @brief     Get offset of element in tensor
+   * @param[in] index Index of element
+   * @return    Offset of element
+   */
+  uint32_t byte_offset_of(const nnfw::misc::tensor::Index &index) const
   {
     // NOTE index.rank() >= _tensor->info()->num_dimensions() should hold here
     const uint32_t rank = index.rank();
@@ -56,7 +73,12 @@ private:
   }
 
 public:
-  T at(const nnfw::util::tensor::Index &index) const
+  /**
+   * @brief     Get value of element in tensor
+   * @param[in] index Index of element
+   * @return    Value of element
+   */
+  T at(const nnfw::misc::tensor::Index &index) const
   {
     const auto offset = byte_offset_of(index);
 
@@ -65,7 +87,12 @@ public:
     return *ptr;
   }
 
-  T &at(const nnfw::util::tensor::Index &index)
+  /**
+   * @brief     Get reference of element in tensor
+   * @param[in] index Index of element
+   * @return    Reference of element
+   */
+  T &at(const nnfw::misc::tensor::Index &index)
   {
     const auto offset = byte_offset_of(index);
 
diff --git a/runtimes/pure_arm_compute/src/internal/layers/FeatureLoggingLayer.h b/runtimes/pure_arm_compute/src/internal/layers/FeatureLoggingLayer.h
index 502a1ee0e..83ae7c17b 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/FeatureLoggingLayer.h
+++ b/runtimes/pure_arm_compute/src/internal/layers/FeatureLoggingLayer.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file        FeatureLoggingLayer.h
+ * @brief       This file contains FeatureLoggingLayer class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
 #ifndef __FEATURE_LOGGING_LAYER_H__
 #define __FEATURE_LOGGING_LAYER_H__
 
@@ -27,9 +33,24 @@
 
 #include "internal/arm_compute.h"
 
+/**
+ * @brief Class to run FeatureLogging Layer
+ */
 class FeatureLoggingLayer : public ::arm_compute::IFunction
 {
 public:
+  FeatureLoggingLayer(void) : _tag(""), _target(nullptr)
+  {
+    // DO NOTHING
+  }
+
+public:
+  /**
+   * @brief Configure the layer
+   * @param[in] tag Text tag for this layer
+   * @param[in] target The feature tensor to be printed
+   * @return N/A
+   */
   void configure(const std::string &tag, ::arm_compute::ITensor *target)
   {
     _tag = tag;
@@ -37,6 +58,10 @@ public:
   }
 
 public:
+  /**
+   * @brief Run the operation. Must be called after configure().
+   * @return N/A
+   */
   void run(void) override
   {
     if (::internal::arm_compute::isGpuMode())
diff --git a/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.cc
index 311284efc..28789a801 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.cc
+++ b/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.cc
@@ -17,8 +17,6 @@
 #include "GenericFullyConnectedLayer.h"
 #include "internal/arm_compute.h"
 
-#include <arm_compute/core/Helpers.h>
-
 void GenericFullyConnectedLayer::configure(::arm_compute::ITensor *input,
                                            ::arm_compute::ITensor *weights,
                                            ::arm_compute::ITensor *biases,
@@ -56,9 +54,9 @@ void GenericFullyConnectedLayer::configure(::arm_compute::ITensor *input,
     {
       // reshape
       auto_init_if_empty(*_neon_buffer.info(), _input->info()->clone()->set_tensor_shape(reshape));
-      _generic_reshape.configure(CAST_NE(_input), &_neon_buffer);
+      _generic_reshape.configure(_input, &_neon_buffer);
 
-      _neon_fc.configure(&_neon_buffer, CAST_NE(_weights), CAST_NE(_biases), CAST_NE(_output));
+      _neon_fc.configure(&_neon_buffer, _weights, _biases, _output);
 
       // NOTE _neon_buffer is inaccessible from outside, and thus it is safe to invoke allocate
       // here.
@@ -66,7 +64,7 @@ void GenericFullyConnectedLayer::configure(::arm_compute::ITensor *input,
     }
     else
     {
-      _neon_fc.configure(CAST_NE(_input), CAST_NE(_weights), CAST_NE(_biases), CAST_NE(_output));
+      _neon_fc.configure(_input, _weights, _biases, _output);
     }
   }
 }
diff --git a/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.h b/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.h
index 55d8683da..f1519f54d 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.h
+++ b/runtimes/pure_arm_compute/src/internal/layers/GenericFullyConnectedLayer.h
@@ -14,23 +14,52 @@
  * limitations under the License.
  */
 
+/**
+ * @file        GenericFullyConnectedLayer.h
+ * @brief       This file contains GenericFullyConnectedLayer class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
 #ifndef __GENERIC_FULLY_CONNECTED_LAYER_H__
 #define __GENERIC_FULLY_CONNECTED_LAYER_H__
 
-#include <arm_compute/runtime/Tensor.h>
-#include <arm_compute/runtime/CL/CLTensor.h>
 #include <arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h>
 #include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h>
 #include "internal/layers/GenericReshapeLayer.h"
 
+/**
+ * @brief Class to run FullyConnected Layer with both CPU and GPU
+ */
 class GenericFullyConnectedLayer : public ::arm_compute::IFunction
 {
 public:
+  GenericFullyConnectedLayer(void)
+      : _input(nullptr), _weights(nullptr), _biases(nullptr), _output(nullptr), _cl_buffer{},
+        _neon_buffer{}, _cl_fc{}, _neon_fc{}, _generic_reshape{}, _needs_reshape(false)
+  {
+    // DO NOTHING
+  }
+
+public:
+  /**
+   * @brief Configure the layer
+   * @param[in] input The source tensor
+   * @param[in] weights The tensor that is filled with weight values
+   * @param[in] biases The tensor that is filled with biase values
+   * @param[in] output The destination tensor
+   * @param[in] needs_reshape Whether it needs to be reshaped or not
+   * @param[in] reshape The tensor shape to be reshaped. Only valid when needs_reshape is true.
+   * @return N/A
+   */
   void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *weights,
                  ::arm_compute::ITensor *biases, ::arm_compute::ITensor *output, bool needs_reshape,
                  ::arm_compute::TensorShape reshape);
 
 public:
+  /**
+   * @brief Run the operation. Must be called after configure().
+   * @return N/A
+   */
   void run(void) override;
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.cc
index 2cdfe1b6e..c38c2e9e3 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.cc
+++ b/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.cc
@@ -43,8 +43,8 @@ void GenericReshapeLayer::configure(::arm_compute::ITensor *input, ::arm_compute
   }
   else
   {
-    _neon_permute.configure(CAST_NE(input), &_neon_permuted, pv);
-    _neon_reshape.configure(&_neon_permuted, CAST_NE(output));
+    _neon_permute.configure(input, &_neon_permuted, pv);
+    _neon_reshape.configure(&_neon_permuted, output);
 
     // NOTE _permuted is inaccessible from outside, and thus it is safe to invoke allocate here.
     _neon_permuted.allocator()->allocate();
diff --git a/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.h b/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.h
index 1def21085..a22c14c8b 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.h
+++ b/runtimes/pure_arm_compute/src/internal/layers/GenericReshapeLayer.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file        GenericReshapeLayer.h
+ * @brief       This file contains GenericReshapeLayer class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
 #ifndef __GENERIC_RESHAPE_LAYER_H__
 #define __GENERIC_RESHAPE_LAYER_H__
 
@@ -25,12 +31,33 @@
 #include <arm_compute/runtime/NEON/functions/NEPermute.h>
 #include <arm_compute/runtime/NEON/functions/NEReshapeLayer.h>
 
+/**
+ * @brief Class to run Reshape Layer with both CPU and GPU
+ */
 class GenericReshapeLayer : public ::arm_compute::IFunction
 {
 public:
+  GenericReshapeLayer(void)
+      : _input(nullptr), _output(nullptr), _cl_permuted{}, _neon_permuted{}, _cl_permute{},
+        _cl_reshape{}, _neon_permute{}, _neon_reshape{}
+  {
+    // DO NOTHING
+  }
+
+public:
+  /**
+   * @brief Configure the layer
+   * @param[in] input The source tensor
+   * @param[in] output The destination tensor
+   * @return N/A
+   */
   void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output);
 
 public:
+  /**
+   * @brief Run the operation. Must be called after configure().
+   * @return N/A
+   */
   void run(void) override;
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/layers/PadLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/PadLayer.cc
deleted file mode 100644
index 4a5370587..000000000
--- a/runtimes/pure_arm_compute/src/internal/layers/PadLayer.cc
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <iostream>
-#include "PadLayer.h"
-#include <arm_compute/runtime/CL/CLScheduler.h>
-
-void PadLayer::configure(::arm_compute::ICLTensor *input, ::arm_compute::ICLTensor *output,
-                         unsigned int border_width)
-{
-  _input = input;
-  _output = output;
-  _border_width = border_width;
-  _output_height = _output->info()->dimension(0);
-  _output_width = _output->info()->dimension(1);
-
-  uint8_t constant_border_value = 0;
-  ::arm_compute::PixelValue constant_pixel_value = ::arm_compute::PixelValue(constant_border_value);
-
-  unsigned int padding_size = _border_width;
-  input->info()->extend_padding(::arm_compute::PaddingSize{padding_size});
-  _fillborderkernel.configure(input, _border_width, ::arm_compute::BorderMode::CONSTANT,
-                              constant_pixel_value);
-}
-
-void PadLayer::run(void)
-{
-  _fillborderkernel.run();
-
-  ::arm_compute::Coordinates coordinates =
-      ::arm_compute::Coordinates(-_border_width, -_border_width);
-  ::arm_compute::TensorShape new_tensor_shape =
-      ::arm_compute::TensorShape(_output_height, _output_width);
-
-  /* NOTE: The cl kernel fills the data in the borders(not in the tensor).
-           Once the tensor is received back at NNAPI, we are adjusting
-           the valid region in such a way that the padding becomes part of the tensor itself
-           and matches the size of output. */
-  _input->info()->set_valid_region(::arm_compute::ValidRegion(coordinates, new_tensor_shape));
-
-  /* NOTE: Since cl kernel does not have an argument for output tensor while NNAPI does.
-           We need to map the input (tensor that is passed to the cl kernel) back to
-           output. */
-
-  // TODO: Write a modified CLCopy kernel to do this job.
-  populateOutput();
-}
-
-void PadLayer::populateOutput()
-{
-  auto &queue = ::arm_compute::CLScheduler::get().queue();
-  _input->map(queue);
-  _output->map(queue);
-
-  auto input_tensor = static_cast<::arm_compute::ITensor *>(_input);
-  auto const source_data = input_tensor->buffer();
-
-  auto output_tensor = static_cast<::arm_compute::ITensor *>(_output);
-  auto dst_data = output_tensor->buffer();
-
-  memmove(dst_data, source_data, _output_height * _output_width * 4);
-
-  _input->unmap(queue);
-  _output->unmap(queue);
-}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.cc
new file mode 100644
index 000000000..6d348e814
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.cc
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/layers/SimpleArgMinMax.h"
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+void SimpleArgMinMax::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
+                                std::vector<uint32_t> axis, ::arm_compute::ArgOperation op)
+{
+  _input = input;
+  _output = output;
+  _axis = axis;
+  _input_rank = input->info()->num_dimensions();
+  _op_type = op;
+}
+
+inline const ::arm_compute::TensorShape
+inferOutputShape(const ::arm_compute::TensorShape &input_shape, const std::vector<uint32_t> &axis,
+                 int input_rank)
+{
+  ::arm_compute::TensorShape out_shape{};
+  size_t dim = 1;
+  for (int i = 0; i < input_rank; ++i)
+  {
+    dim = input_shape[i];
+    out_shape.set(i, dim);
+  }
+
+  for (int i = 0; i < axis.size(); ++i)
+  {
+    out_shape.set(axis[i], 1);
+  }
+
+  return out_shape;
+}
+
+template <typename T>
+inline T getArgMinMaxEle(const ::arm_compute::ITensor *input,
+                         const ::arm_compute::TensorShape &input_shape,
+                         const ::arm_compute::TensorShape &output_shape, const size_t b,
+                         const size_t d, const size_t h, const size_t w, const int axis,
+                         const ::arm_compute::ArgOperation op_type)
+{
+  // If output[dimention] == 1, will check all values of that dimension because of reducing
+  // dimension.
+  // Else will check only one value.
+  const size_t start_b = output_shape[3] == 1 ? 0 : b;
+  const size_t start_d = output_shape[2] == 1 ? 0 : d;
+  const size_t start_h = output_shape[1] == 1 ? 0 : h;
+  const size_t start_w = output_shape[0] == 1 ? 0 : w;
+  const size_t stop_b = output_shape[3] == 1 ? input_shape[3] - 1 : b;
+  const size_t stop_d = output_shape[2] == 1 ? input_shape[2] - 1 : d;
+  const size_t stop_h = output_shape[1] == 1 ? input_shape[1] - 1 : h;
+  const size_t stop_w = output_shape[0] == 1 ? input_shape[0] - 1 : w;
+
+  ::arm_compute::Coordinates id{w, h, d, b};
+  ::arm_compute::Coordinates min_max_id{w, h, d, b};
+
+  T value = *reinterpret_cast<T *>(input->ptr_to_element(id));
+  T tval = *reinterpret_cast<T *>(input->ptr_to_element(id));
+
+  for (size_t in_b = start_b; in_b <= stop_b; ++in_b)
+  {
+    id.set(3, in_b);
+    for (size_t in_d = start_d; in_d <= stop_d; ++in_d)
+    {
+      id.set(2, in_d);
+      for (size_t in_h = start_h; in_h <= stop_h; ++in_h)
+      {
+        id.set(1, in_h);
+        for (size_t in_w = start_w; in_w <= stop_w; ++in_w)
+        {
+          id.set(0, in_w);
+          if (op_type == ::arm_compute::ArgOperation::MIN)
+          {
+            value = std::min<T>(value, *reinterpret_cast<T *>(input->ptr_to_element(id)));
+          }
+          else if (op_type == ::arm_compute::ArgOperation::MAX)
+          {
+            value = std::max<T>(value, *reinterpret_cast<T *>(input->ptr_to_element(id)));
+          }
+          else
+            throw std::runtime_error("This Arg operation is not supported, yet");
+
+          if (tval != value)
+          {
+            min_max_id = id;
+            tval = value;
+          }
+        }
+      }
+    }
+  }
+
+  return min_max_id[axis];
+}
+
+template <typename T>
+inline void
+getArgMinMax(const ::arm_compute::ITensor *input, const ::arm_compute::TensorShape &input_shape,
+             const ::arm_compute::TensorShape &output_shape, ::arm_compute::ITensor *output,
+             const int axis, const ::arm_compute::ArgOperation op_type)
+{
+  ::arm_compute::Coordinates id;
+  for (size_t out_b = 0; out_b < output_shape[3]; ++out_b)
+  {
+    id.set(3, out_b);
+    for (size_t out_d = 0; out_d < output_shape[2]; ++out_d)
+    {
+      id.set(2, out_d);
+      for (size_t out_h = 0; out_h < output_shape[1]; ++out_h)
+      {
+        id.set(1, out_h);
+        for (size_t out_w = 0; out_w < output_shape[0]; ++out_w)
+        {
+          id.set(0, out_w);
+          *reinterpret_cast<int *>(output->ptr_to_element(id)) = getArgMinMaxEle<T>(
+              input, input_shape, output_shape, out_b, out_d, out_h, out_w, axis, op_type);
+        }
+      }
+    }
+  }
+}
+
+void SimpleArgMinMax::run()
+{
+  if (::internal::arm_compute::isGpuMode())
+  {
+    auto &q = ::arm_compute::CLScheduler::get().queue();
+
+    CAST_CL(_input)->map(q);
+    CAST_CL(_output)->map(q);
+  }
+
+  ::arm_compute::TensorShape input_shape = _input->info()->tensor_shape();
+
+  // Axis dimension is 1 and size is 1.
+  // TODO support axis size > 1.
+  int axis_val = _axis[0];
+  ::arm_compute::TensorShape output_shape = inferOutputShape(input_shape, _axis, _input_rank);
+
+  _output->info()->set_tensor_shape(output_shape);
+  switch (_input->info()->data_type())
+  {
+    case ::arm_compute::DataType::QASYMM8:
+      getArgMinMax<uint8_t>(_input, input_shape, output_shape, _output, axis_val, _op_type);
+      break;
+    case ::arm_compute::DataType::S32:
+      getArgMinMax<int32_t>(_input, input_shape, output_shape, _output, axis_val, _op_type);
+      break;
+    case ::arm_compute::DataType::F32:
+      getArgMinMax<float>(_input, input_shape, output_shape, _output, axis_val, _op_type);
+      break;
+    default:
+      ARM_COMPUTE_ERROR("DataType not supported");
+      break;
+  }
+
+  _output->info()->set_tensor_shape(output_shape);
+
+  if (::internal::arm_compute::isGpuMode())
+  {
+    auto &q = ::arm_compute::CLScheduler::get().queue();
+
+    CAST_CL(_input)->unmap(q);
+    CAST_CL(_output)->unmap(q);
+  }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.h
new file mode 100644
index 000000000..b90e74579
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleArgMinMax.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SIMPLE_ARG_MIN_MAX_H__
+#define __SIMPLE_ARG_MIN_MAX_H__
+
+#include "internal/arm_compute.h"
+#include "arm_compute/core/TypesEx.h"
+
+class SimpleArgMinMax : public ::arm_compute::IFunction
+{
+public:
+  SimpleArgMinMax(void) : _input(nullptr), _output(nullptr), _axis(), _input_rank(0)
+  {
+    // DO NOTHING
+  }
+
+public:
+  /** Initialise input and output
+   *
+   * @param[in]  input       First tensor input.
+   * @param[out] output      Output tensor.
+   * @param[in]  axis        Dimension along which to find Min or Max Index.
+   */
+  void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
+                 std::vector<uint32_t> axis, ::arm_compute::ArgOperation _op_type);
+
+  void run() override;
+
+private:
+  ::arm_compute::ITensor *_input;
+  ::arm_compute::ITensor *_output;
+  std::vector<uint32_t> _axis;
+  int _input_rank;
+  ::arm_compute::ArgOperation _op_type;
+};
+
+#endif /*__SIMPLE_ARG_MIN_MAX_H__ */
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleArithmeticAddition.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleArithmeticAddition.h
index 31c927b4f..aed9ae286 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/SimpleArithmeticAddition.h
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleArithmeticAddition.h
@@ -14,15 +14,36 @@
  * limitations under the License.
  */
 
+/**
+ * @file        SimpleArithmeticAddition.h
+ * @brief       This file contains SimpleArithmeticAddition class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
 #ifndef __SIMPLE_ARITHMETIC_ADDITION_H__
 #define __SIMPLE_ARITHMETIC_ADDITION_H__
 
 #include "internal/arm_compute.h"
 #include <arm_compute/core/ITensor.h>
 
+/**
+ * @brief Class to run SimpleArithmeticAddition Layer
+ */
 class SimpleArithmeticAddition : public ::arm_compute::IFunction
 {
 public:
+  SimpleArithmeticAddition(void) : _lhs(nullptr), _rhs(nullptr), _out(nullptr)
+  {
+    // DO NOTHING
+  }
+
+  /**
+   * @brief Configure the layer
+   * @param[in] lhs Lefthand-side operand
+   * @param[in] rhs Righthand-side operand
+   * @param[in] out The destination tensor(Result operand)
+   * @return N/A
+   */
   void configure(::arm_compute::ITensor *lhs, ::arm_compute::ITensor *rhs,
                  ::arm_compute::ITensor *out)
   {
@@ -32,6 +53,10 @@ public:
   }
 
 public:
+  /**
+   * @brief Run the operation. Must be called after configure().
+   * @return N/A
+   */
   void run(void) override
   {
     if (::internal::arm_compute::isGpuMode())
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.cc
new file mode 100644
index 000000000..87175ee1a
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.cc
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/layers/SimpleBatchToSpaceNd.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+void SimpleBatchToSpaceND::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
+                                     const int32_t *block_size,
+                                     const ::arm_compute::Coordinates &axises)
+{
+  const auto rank = axises.num_dimensions();
+  assert(rank == 4);
+
+  for (int i = 0; i < rank; ++i)
+    assert(axises[i] >= 0 && axises[i] < rank);
+
+  _input = input;
+  _output = output;
+  _block_size = block_size;
+  _axises = axises;
+}
+
+template <typename T>
+inline void BatchToSpaceND(const ::arm_compute::ITensor *input,
+                           const ::arm_compute::TensorShape &input_shape,
+                           const int32_t *block_size_data, ::arm_compute::ITensor *output,
+                           const ::arm_compute::TensorShape &output_shape,
+                           const ::arm_compute::Coordinates &axises)
+{
+  const int output_batch = output_shape[axises[0]];
+  const int output_height = output_shape[axises[1]];
+  const int output_width = output_shape[axises[2]];
+  const int depth = output_shape[axises[3]];
+
+  for (int out_b = 0; out_b < output_batch; ++out_b)
+  {
+    for (int out_h = 0; out_h < output_height; ++out_h)
+    {
+      for (int out_w = 0; out_w < output_width; ++out_w)
+      {
+        for (int out_d = 0; out_d < depth; ++out_d)
+        {
+          const int in_d = out_d;
+          const int in_h = out_h / block_size_data[0];
+          const int in_w = out_w / block_size_data[1];
+          const int in_b =
+              out_b +
+              ((out_h % block_size_data[0]) * block_size_data[1] + out_w % block_size_data[1]) *
+                  output_batch;
+
+          auto input_id =
+              asARMComputeCoordinates(::arm_compute::Coordinates{in_b, in_h, in_w, in_d}, axises);
+          auto output_id = asARMComputeCoordinates(
+              ::arm_compute::Coordinates{out_b, out_h, out_w, out_d}, axises);
+
+          *reinterpret_cast<T *>(output->ptr_to_element(output_id)) =
+              *reinterpret_cast<T *>(input->ptr_to_element(input_id));
+        }
+      }
+    }
+  }
+}
+void SimpleBatchToSpaceND::run()
+{
+  if (::internal::arm_compute::isGpuMode())
+  {
+    auto &q = ::arm_compute::CLScheduler::get().queue();
+
+    CAST_CL(_input)->map(q);
+    CAST_CL(_output)->map(q);
+  }
+
+  switch (_input->info()->data_type())
+  {
+    case ::arm_compute::DataType::U8:
+    case ::arm_compute::DataType::QASYMM8:
+      BatchToSpaceND<uint8_t>(_input, _input->info()->tensor_shape(), _block_size, _output,
+                              _output->info()->tensor_shape(), _axises);
+      break;
+    case ::arm_compute::DataType::F32:
+      BatchToSpaceND<float>(_input, _input->info()->tensor_shape(), _block_size, _output,
+                            _output->info()->tensor_shape(), _axises);
+      break;
+    default:
+      ARM_COMPUTE_ERROR("DataType not supported");
+      break;
+  }
+
+  if (::internal::arm_compute::isGpuMode())
+  {
+    auto &q = ::arm_compute::CLScheduler::get().queue();
+
+    CAST_CL(_input)->unmap(q);
+    CAST_CL(_output)->unmap(q);
+  }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.h
new file mode 100644
index 000000000..5695d9719
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleBatchToSpaceNd.h
@@ -0,0 +1,51 @@
+/*
+ *Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SIMPLE_BATCH_TO_SPACE_ND_H__
+#define __SIMPLE_BATCH_TO_SPACE_ND_H__
+
+#include "internal/arm_compute.h"
+#include "internal/arm_compute/Cast.h"
+
+class SimpleBatchToSpaceND : public ::arm_compute::IFunction
+{
+public:
+  SimpleBatchToSpaceND(void) : _input(nullptr), _output(nullptr), _block_size(nullptr), _axises{}
+  {
+    // DO NOTHING
+  }
+
+  /** Initialise input and output
+   *
+   * @param[in]  input       First tensor input.
+   * @param[out] output      Output tensor.
+   * @param[in]  block_size  Block size.
+   * @param[in]  axises      Axises of rank 4
+   */
+  void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
+                 const int32_t *block_size,
+                 const ::arm_compute::Coordinates &axises = getARMComputeAxises(4));
+
+  void run() override;
+
+private:
+  ::arm_compute::ITensor *_input;
+  ::arm_compute::ITensor *_output;
+  const int32_t *_block_size;
+  ::arm_compute::Coordinates _axises;
+};
+
+#endif /*__SIMPLE_BATCH_TO_SPACE_ND_H__ */
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.cc
new file mode 100644
index 000000000..7c7706a78
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.cc
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "internal/layers/SimpleCastLayer.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+void SimpleCastLayer::castData(::arm_compute::ITensor *in, ::arm_compute::ITensor *out,
+                               const arm_compute::Coordinates &id)
+{
+  switch (in->info()->data_type())
+  {
+    case ::arm_compute::DataType::F32:
+    {
+      copyCast(*reinterpret_cast<float *>(in->ptr_to_element(id)), out, id);
+      break;
+    }
+    case ::arm_compute::DataType::S32:
+    {
+      copyCast(*reinterpret_cast<int32_t *>(in->ptr_to_element(id)), out, id);
+      break;
+    }
+    case ::arm_compute::DataType::U32:
+    {
+      copyCast(*reinterpret_cast<uint32_t *>(in->ptr_to_element(id)), out, id);
+      break;
+    }
+    case ::arm_compute::DataType::QASYMM8:
+    {
+      const uint8_t quantizedValue = *(in->ptr_to_element(id));
+      copyCast(in->info()->quantization_info().dequantize(quantizedValue), out, id);
+      break;
+    }
+    default:
+      throw std::runtime_error("Not supported, yet");
+      break;
+  }
+}
+
+void SimpleCastLayer::configure(::arm_compute::ITensor *in, ::arm_compute::ITensor *out)
+{
+  _in = in;
+  _out = out;
+}
+
+void SimpleCastLayer::run(void)
+{
+  if (::internal::arm_compute::isGpuMode())
+  {
+    auto &q = ::arm_compute::CLScheduler::get().queue();
+    CAST_CL(_in)->map(q);
+    CAST_CL(_out)->map(q);
+  }
+
+  arm_compute::Window window;
+  window.use_tensor_dimensions(_out->info()->tensor_shape());
+
+  execute_window_loop(window,
+                      [this](const arm_compute::Coordinates &id) { castData(_in, _out, id); });
+
+  if (::internal::arm_compute::isGpuMode())
+  {
+    auto &q = ::arm_compute::CLScheduler::get().queue();
+    CAST_CL(_out)->unmap(q);
+    CAST_CL(_in)->unmap(q);
+  }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.h
index fa3006438..f9a48b481 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.h
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleCastLayer.h
@@ -14,80 +14,55 @@
  * limitations under the License.
  */
 
+/**
+ * @file        SimpleCastLayer.h
+ * @brief       This file contains SimpleCastLayer class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
 #ifndef __SIMPLE_CAST_LAYER_H__
 #define __SIMPLE_CAST_LAYER_H__
 
-#include <arm_compute/core/ITensor.h>
-
 #include "internal/arm_compute.h"
-#include "internal/op/Cast.h"
+#include "internal/arm_compute/Cast.h"
 
+/**
+ * @brief Class to run SimpleCast Layer
+ */
 class SimpleCastLayer : public ::arm_compute::IFunction
 {
 public:
-  void configure(::arm_compute::ITensor *in, ::arm_compute::ITensor *out)
+  SimpleCastLayer(void) : _in(nullptr), _out(nullptr)
   {
-    _in = in;
-    _out = out;
+    // DO NOTHING
   }
 
-public:
-  void run(void) override
-  {
-    if (::internal::arm_compute::isGpuMode())
-    {
-      auto &q = ::arm_compute::CLScheduler::get().queue();
-      CAST_CL(_in)->map(q);
-      CAST_CL(_out)->map(q);
-    }
-
-    arm_compute::Window window;
-    window.use_tensor_dimensions(_out->info()->tensor_shape());
+  /**
+   * @brief Configure the layer
+   * @param[in] in The source tensor
+   * @param[in] out The destination tensor
+   * @return N/A
+   */
+  void configure(::arm_compute::ITensor *in, ::arm_compute::ITensor *out);
 
-    execute_window_loop(window,
-                        [this](const arm_compute::Coordinates &id) { castData(_in, _out, id); });
-
-    if (::internal::arm_compute::isGpuMode())
-    {
-      auto &q = ::arm_compute::CLScheduler::get().queue();
-      CAST_CL(_out)->unmap(q);
-      CAST_CL(_in)->unmap(q);
-    }
-  }
+  /**
+   * @brief Run the operation. Must be called after configure().
+   * @return N/A
+   */
+  void run(void) override;
 
+private:
+  /**
+   * @brief Cast and copy data from one tensor to another
+   *
+   * @param[in] in The source tensor
+   * @param[out] out The destination tensor
+   * @param[in] id Coordinates to copy
+   * @return N/A
+   */
   void castData(::arm_compute::ITensor *in, ::arm_compute::ITensor *out,
-                const arm_compute::Coordinates &id)
-  {
-    switch (in->info()->data_type())
-    {
-      case ::arm_compute::DataType::F32:
-      {
-        copyCast(*reinterpret_cast<float *>(in->ptr_to_element(id)), out, id);
-        break;
-      }
-      case ::arm_compute::DataType::S32:
-      {
-        copyCast(*reinterpret_cast<int32_t *>(in->ptr_to_element(id)), out, id);
-        break;
-      }
-      case ::arm_compute::DataType::U32:
-      {
-        copyCast(*reinterpret_cast<uint32_t *>(in->ptr_to_element(id)), out, id);
-        break;
-      }
-      case ::arm_compute::DataType::QASYMM8:
-      {
-        const uint8_t quantizedValue = *(in->ptr_to_element(id));
-        copyCast(in->info()->quantization_info().dequantize(quantizedValue), out, id);
-        break;
-      }
-      default:
-        throw std::runtime_error("Not supported, yet");
-        break;
-    }
-  }
+                const arm_compute::Coordinates &id);
 
-private:
   ::arm_compute::ITensor *_in;
   ::arm_compute::ITensor *_out;
 };
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc
new file mode 100644
index 000000000..d62a8321b
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/layers/SimpleDepthToSpace.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+void SimpleDepthToSpace::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
+                                   int32_t block_size, const ::arm_compute::Coordinates &axises)
+{
+  const auto rank = axises.num_dimensions();
+  assert(rank == 4);
+  for (int i = 0; i < rank; ++i)
+  {
+    assert(axises[i] >= 0);
+    assert(axises[i] < rank);
+  }
+
+  _input = input;
+  _output = output;
+  _block_size = block_size;
+  _axises = axises;
+}
+
+template <typename T>
+inline void DepthToSpace(const ::arm_compute::ITensor *input,
+                         const ::arm_compute::TensorShape &input_shape, int32_t block_size,
+                         ::arm_compute::ITensor *output,
+                         const ::arm_compute::TensorShape &output_shape,
+                         const ::arm_compute::Coordinates &axises)
+{
+  const int output_batch = output_shape[axises[0]];
+  const int output_height = output_shape[axises[1]];
+  const int output_width = output_shape[axises[2]];
+  const int output_depth = output_shape[axises[3]];
+
+  for (int out_b = 0; out_b < output_batch; ++out_b)
+  {
+    for (int out_h = 0; out_h < output_height; ++out_h)
+    {
+      for (int out_w = 0; out_w < output_width; ++out_w)
+      {
+        for (int out_d = 0; out_d < output_depth; ++out_d)
+        {
+          const int in_b = out_b;
+          const int in_h = out_h / block_size;
+          const int in_w = out_w / block_size;
+          const int in_d =
+              out_d + ((out_h % block_size) * block_size + out_w % block_size) * output_depth;
+
+          auto input_id =
+              asARMComputeCoordinates(::arm_compute::Coordinates{in_b, in_h, in_w, in_d}, axises);
+          auto output_id = asARMComputeCoordinates(
+              ::arm_compute::Coordinates{out_b, out_h, out_w, out_d}, axises);
+
+          *reinterpret_cast<T *>(output->ptr_to_element(output_id)) =
+              *reinterpret_cast<T *>(input->ptr_to_element(input_id));
+        }
+      }
+    }
+  }
+}
+
+void SimpleDepthToSpace::run()
+{
+  if (::internal::arm_compute::isGpuMode())
+  {
+    auto &q = ::arm_compute::CLScheduler::get().queue();
+
+    CAST_CL(_input)->map(q);
+    CAST_CL(_output)->map(q);
+  }
+
+  switch (_input->info()->data_type())
+  {
+    case ::arm_compute::DataType::U8:
+    case ::arm_compute::DataType::QASYMM8:
+      DepthToSpace<uint8_t>(_input, _input->info()->tensor_shape(), _block_size, _output,
+                            _output->info()->tensor_shape(), _axises);
+      break;
+    case ::arm_compute::DataType::F32:
+      DepthToSpace<float>(_input, _input->info()->tensor_shape(), _block_size, _output,
+                          _output->info()->tensor_shape(), _axises);
+      break;
+    default:
+      ARM_COMPUTE_ERROR("DataType not supported");
+      break;
+  }
+
+  if (::internal::arm_compute::isGpuMode())
+  {
+    auto &q = ::arm_compute::CLScheduler::get().queue();
+
+    CAST_CL(_input)->unmap(q);
+    CAST_CL(_output)->unmap(q);
+  }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.h
new file mode 100644
index 000000000..1032aaa47
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SIMPLE_DEPTH_TO_SPACE_H__
+#define __SIMPLE_DEPTH_TO_SPACE_H__
+
+#include "internal/arm_compute.h"
+#include "internal/arm_compute/Cast.h"
+
+class SimpleDepthToSpace : public ::arm_compute::IFunction
+{
+public:
+  SimpleDepthToSpace(void) : _input(nullptr), _output(nullptr), _block_size(0), _axises{}
+  {
+    // DO NOTHING
+  }
+
+public:
+  /** Initialise input and output
+   *
+   * @param[in]  input       First tensor input.
+   * @param[out] output      Output tensor.
+   * @param[in]  block_size  Block size.
+   * @param[in]  axises      Axises of rank 4
+   */
+  void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, int32_t block_size,
+                 const ::arm_compute::Coordinates &axises = getARMComputeAxises(4));
+
+  void run() override;
+
+private:
+  ::arm_compute::ITensor *_input;
+  ::arm_compute::ITensor *_output;
+  int32_t _block_size;
+  ::arm_compute::Coordinates _axises;
+};
+
+#endif /*__SIMPLE_DEPTH_TO_SPACE_H__ */
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc
index 089c783c1..ae740bb10 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.cc
@@ -1,3 +1,18 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 #include "internal/layers/SimpleEmbeddingLookup.h"
 
 #include <arm_compute/runtime/CL/CLScheduler.h>
@@ -6,7 +21,8 @@ void SimpleEmbeddingLookup::configure(::arm_compute::ITensor *lookups,
                                       ::arm_compute::ITensor *values,
                                       ::arm_compute::ITensor *output)
 {
-  // Assume that verification of operands are already done at Planner::visit()
+  assert(values->info()->num_dimensions() == output->info()->num_dimensions());
+  assert(values->info()->num_dimensions() > 1 && values->info()->num_dimensions() <= 4);
   _lookups = lookups;
   _values = values;
   _output = output;
@@ -25,85 +41,62 @@ void SimpleEmbeddingLookup::run()
 
   // type of elements of lookups is always integer
   const int32_t *lookups_buf = reinterpret_cast<int32_t *>(_lookups->buffer());
-  const auto values_buf = _values->buffer();
-  auto output_buf = _output->buffer();
 
   const auto lookups_info = _lookups->info();
   const auto values_info = _values->info();
   const auto output_info = _output->info();
 
-  // TODO Refactor below duplicated code!
-  const auto values_rank = values_info->num_dimensions();
-  switch (values_rank)
+  // NOTE The first dimension's position is always at the end of dimensions.
+  const auto first_dim_pos = values_info->num_dimensions() - 1;
+
+  const size_t first_dim = values_info->dimension(first_dim_pos);
+  for (size_t i = 0; i < lookups_info->dimension(0); ++i)
   {
-    case 2:
-      // (H,W) in nnapi -> (W,H) in acl
-      {
-        const size_t row_size = values_info->dimension(1);
-        const size_t row_bytes = values_info->total_size() / row_size;
-        for (size_t i = 0; i < lookups_info->dimension(0); ++i)
-        {
-          if (lookups_buf[i] < 0 || lookups_buf[i] >= row_size)
-            throw std::runtime_error("Embedding Lookup: index out of bounds.");
-
-          size_t idx = lookups_buf[i];
-          size_t row_offset_by_idx = values_info->offset_element_in_bytes({0, idx});
-          size_t row_offset_by_i = output_info->offset_element_in_bytes({0, i});
-
-          unsigned char *sink_addr = output_buf + row_offset_by_i;
-          unsigned char *source_addr = values_buf + row_offset_by_idx;
-          memcpy(sink_addr, source_addr, row_bytes);
-        }
-      }
-      break;
-    case 3:
-      // (B,H,W) in nnapi -> (W,H,B) in acl
-      {
-        const size_t row_size = values_info->dimension(2);
-        const size_t row_bytes = values_info->total_size() / row_size;
-        for (size_t i = 0; i < lookups_info->dimension(0); ++i)
-        {
-          if (lookups_buf[i] < 0 || lookups_buf[i] >= row_size)
-            throw std::runtime_error("Embedding Lookup: index out of bounds.");
-
-          size_t idx = lookups_buf[i];
-          size_t row_offset_by_idx = values_info->offset_element_in_bytes({0, 0, idx});
-          size_t row_offset_by_i = output_info->offset_element_in_bytes({0, 0, i});
-
-          unsigned char *sink_addr = output_buf + row_offset_by_i;
-          unsigned char *source_addr = values_buf + row_offset_by_idx;
-          memcpy(sink_addr, source_addr, row_bytes);
-        }
-      }
-      break;
-    case 4:
-      // (N,H,W,C) in nnapi -> (N,C,H,W) in acl
-      {
-        const size_t row_size = values_info->dimension(3);
-        const size_t row_bytes = values_info->total_size() / row_size;
-        for (size_t i = 0; i < lookups_info->dimension(0); ++i)
-        {
-          if (lookups_buf[i] < 0 || lookups_buf[i] >= row_size)
-            throw std::runtime_error("Embedding Lookup: index out of bounds.");
-
-          size_t idx = lookups_buf[i];
-          size_t row_offset_by_idx = values_info->offset_element_in_bytes({0, 0, 0, idx});
-          size_t row_offset_by_i = output_info->offset_element_in_bytes({0, 0, 0, i});
-
-          unsigned char *sink_addr = output_buf + row_offset_by_i;
-          unsigned char *source_addr = values_buf + row_offset_by_idx;
-          memcpy(sink_addr, source_addr, row_bytes);
-        }
-      }
-      break;
-    case 1:
-      // In this case, shape of values actually is matrix but the height(row size) is 1 in acl. If
-      // row size is 1, this op is not needed and it means this situtation could be wrong.
-      throw std::runtime_error("Wrong usage of EmbeddingLookup op!");
-    default:
-      throw std::runtime_error("Not supported rank!");
+    if (lookups_buf[i] < 0 || lookups_buf[i] >= first_dim)
+      throw std::runtime_error("Embedding Lookup: index out of bounds.");
   }
 
+  // If each strides of values and output are different, applied padding size of the two tensors are
+  // different, therefore, it can not be copied at once.
+  auto can_copy_at_once = [&]() -> bool {
+    const auto &values_strides = values_info->strides_in_bytes();
+    const auto &output_strides = output_info->strides_in_bytes();
+
+    for (size_t i = 0; i < first_dim_pos; ++i)
+    {
+      if (values_strides[i] != values_strides[i])
+        return false;
+    }
+
+    return true;
+  };
+
+  using ::arm_compute::Window;
+  using ::arm_compute::Iterator;
+
+  size_t copy_bytes;
+  Window window;
+  if (can_copy_at_once())
+  {
+    copy_bytes = values_info->total_size() / first_dim;
+    window.use_tensor_dimensions(output_info->tensor_shape(), first_dim_pos);
+  }
+  else
+  {
+    copy_bytes = values_info->dimension(0) * values_info->element_size();
+    window.use_tensor_dimensions(output_info->tensor_shape(), Window::DimY);
+  }
+
+  Iterator it(_output, window);
+  execute_window_loop(window,
+                      [&](const ::arm_compute::Coordinates &id) {
+                        ::arm_compute::Coordinates values_id = id;
+                        const int idx = id[first_dim_pos];
+                        values_id.set(first_dim_pos, lookups_buf[idx]);
+                        memcpy(it.ptr(), _values->ptr_to_element(values_id), copy_bytes);
+                      },
+                      it);
+
   if (::internal::arm_compute::isGpuMode())
   {
     auto &q = ::arm_compute::CLScheduler::get().queue();
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.h
index 9f2cd977f..fd499437f 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.h
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleEmbeddingLookup.h
@@ -1,16 +1,55 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 #ifndef __SIMPLE_EMBEDDING_LOOKUP_H__
 #define __SIMPLE_EMBEDDING_LOOKUP_H__
 
 #include "internal/arm_compute.h"
-#include <arm_compute/core/ITensor.h>
-#include <arm_compute/runtime/IFunction.h>
 
+/**
+ * @file        SimpleEmbeddingLookup.h
+ * @brief       This file contains SimpleEmbeddingLookup class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
+/**
+ * @brief Class to run SimpleEmbeddingLookup Layer
+ */
 class SimpleEmbeddingLookup : public ::arm_compute::IFunction
 {
 public:
+  SimpleEmbeddingLookup(void) : _lookups(nullptr), _values(nullptr), _output(nullptr)
+  {
+    // DO NOTHING
+  }
+
+public:
+  /**
+   * @brief Configure the layer
+   * @param[in] lookups 1D tensor which contains lookup values
+   * @param[in] values The source tensor
+   * @param[in] output The destination tensor
+   * @return N/A
+   */
   void configure(::arm_compute::ITensor *lookups, ::arm_compute::ITensor *values,
                  ::arm_compute::ITensor *output);
 
+  /**
+   * @brief Run the operation. Must be called after configure().
+   * @return N/A
+   */
   void run() override;
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.cc
new file mode 100644
index 000000000..7f8ae2505
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.cc
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/layers/SimpleHashtableLookupLayer.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+void SimpleHashtableLookupLayer::configure(::arm_compute::ITensor *lookups,
+                                           ::arm_compute::ITensor *keys,
+                                           ::arm_compute::ITensor *values,
+                                           ::arm_compute::ITensor *output,
+                                           ::arm_compute::ITensor *hits)
+{
+  _lookups = lookups;
+  _keys = keys;
+  _values = values;
+  _output = output;
+  _hits = hits;
+  _lookup_indices.resize(lookups->info()->dimension(0), -1);
+}
+
+void SimpleHashtableLookupLayer::run()
+{
+  auto &queue = ::arm_compute::CLScheduler::get().queue();
+  if (::internal::arm_compute::isGpuMode())
+  {
+    CAST_CL(_lookups)->map(queue);
+    CAST_CL(_keys)->map(queue);
+    CAST_CL(_values)->map(queue);
+    CAST_CL(_output)->map(queue);
+    CAST_CL(_hits)->map(queue);
+  }
+
+  const int32_t *lookups_buf = reinterpret_cast<int32_t *>(_lookups->buffer());
+  const int32_t *keys_buf = reinterpret_cast<int32_t *>(_keys->buffer());
+  uint8_t *hits_buf = reinterpret_cast<uint8_t *>(_hits->buffer());
+
+  const auto lookups_info = _lookups->info();
+  const auto values_info = _values->info();
+  const auto keys_info = _keys->info();
+  const auto output_info = _output->info();
+
+  // NOTE The first dimension's position must be always at the end of dimensions.
+  const auto first_dim_pos = values_info->num_dimensions() - 1;
+  const size_t first_dim = values_info->dimension(first_dim_pos);
+
+  std::map<int32_t, size_t> key_map;
+  const int keys_num = keys_info->dimension(0);
+  for (size_t key_index = 0; key_index < keys_num; key_index++)
+  {
+    key_map[keys_buf[key_index]] = key_index;
+  }
+
+  const int lookups_num = lookups_info->dimension(0);
+  for (size_t i = 0; i < lookups_num; ++i)
+  {
+    const auto lookup_value = lookups_buf[i];
+    const auto it = key_map.find(lookup_value);
+    if (it != key_map.end())
+    {
+      if (it->second >= first_dim)
+        throw std::runtime_error("HashTable Lookup: index out of bounds.");
+      _lookup_indices[i] = it->second;
+    }
+  }
+
+  // If each strides of values and output are different, applied padding size of the two tensors are
+  // different, therefore, it can not be copied at once.
+  auto can_copy_at_once = [&]() -> bool {
+    const auto &values_strides = values_info->strides_in_bytes();
+    const auto &output_strides = output_info->strides_in_bytes();
+
+    for (size_t i = 0; i < first_dim_pos; ++i)
+    {
+      if (values_strides[i] != values_strides[i])
+        return false;
+    }
+
+    return true;
+  };
+
+  using ::arm_compute::Window;
+  using ::arm_compute::Iterator;
+  using ::arm_compute::Coordinates;
+
+  size_t copy_bytes;
+  Window window;
+  if (can_copy_at_once())
+  {
+    copy_bytes = values_info->total_size() / first_dim;
+    window.use_tensor_dimensions(output_info->tensor_shape(), first_dim_pos);
+  }
+  else
+  {
+    copy_bytes = values_info->dimension(0) * values_info->element_size();
+    window.use_tensor_dimensions(output_info->tensor_shape(), Window::DimY);
+  }
+
+  Iterator it(_output, window);
+  execute_window_loop(window,
+                      [&](const Coordinates &id) {
+                        Coordinates values_id = id;
+                        const int idx = id[first_dim_pos];
+                        const int lookup_index = _lookup_indices[idx];
+                        if (lookup_index >= 0)
+                        {
+                          values_id.set(first_dim_pos, lookup_index);
+                          memcpy(it.ptr(), _values->ptr_to_element(values_id), copy_bytes);
+                          hits_buf[lookup_index] = 1;
+                        }
+                        else
+                        {
+                          memset(it.ptr(), 0, copy_bytes);
+                          hits_buf[lookup_index] = 0;
+                        }
+                      },
+                      it);
+
+  if (::internal::arm_compute::isGpuMode())
+  {
+    CAST_CL(_lookups)->unmap(queue);
+    CAST_CL(_keys)->unmap(queue);
+    CAST_CL(_values)->unmap(queue);
+    CAST_CL(_output)->unmap(queue);
+    CAST_CL(_hits)->unmap(queue);
+  }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.h
new file mode 100644
index 000000000..ba9d2ec0d
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleHashtableLookupLayer.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SIMPLE_HASHTABLE_LOOKUP_H__
+#define __SIMPLE_HASHTABLE_LOOKUP_H__
+
+#include "internal/arm_compute.h"
+
+class SimpleHashtableLookupLayer : public ::arm_compute::IFunction
+{
+public:
+  SimpleHashtableLookupLayer(void)
+      : _lookups(nullptr), _keys(nullptr), _values(nullptr), _output(nullptr), _hits(nullptr)
+  {
+    // DO NOTHING
+  }
+
+  void configure(::arm_compute::ITensor *lookups, ::arm_compute::ITensor *keys,
+                 ::arm_compute::ITensor *values, ::arm_compute::ITensor *output,
+                 ::arm_compute::ITensor *hits);
+
+  void run() override;
+
+private:
+  ::arm_compute::ITensor *_lookups;
+  ::arm_compute::ITensor *_keys;
+  ::arm_compute::ITensor *_values;
+  ::arm_compute::ITensor *_output;
+  ::arm_compute::ITensor *_hits;
+  std::vector<int32_t> _lookup_indices;
+};
+
+#endif /*__SIMPLE_HASHTABLE_LOOKUP_H__ */
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleNeg.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleNeg.cc
new file mode 100644
index 000000000..d3943ad40
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleNeg.cc
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/layers/SimpleNeg.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+void SimpleNeg::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output)
+{
+  _input = input;
+  _output = output;
+}
+
+void SimpleNeg::run()
+{
+  auto &queue = ::arm_compute::CLScheduler::get().queue();
+  if (::internal::arm_compute::isGpuMode())
+  {
+    CAST_CL(_input)->map(queue);
+    CAST_CL(_output)->map(queue);
+  }
+
+  arm_compute::Window window;
+  window.use_tensor_dimensions(_output->info()->tensor_shape());
+
+  execute_window_loop(window, [this](const arm_compute::Coordinates &id) {
+    // NOTE Must be two input tensors of identical type
+    //      Must be output tensor of the same type as input0.
+    assert(_input->info()->data_type() == _output->info()->data_type());
+
+    switch (_input->info()->data_type())
+    {
+      case ::arm_compute::DataType::F32:
+      {
+        const auto input_value = *reinterpret_cast<float *>(_input->ptr_to_element(id));
+        *reinterpret_cast<float *>(_output->ptr_to_element(id)) = -input_value;
+        break;
+      }
+      case ::arm_compute::DataType::S32:
+      {
+        const auto input_value = *reinterpret_cast<int32_t *>(_input->ptr_to_element(id));
+        *reinterpret_cast<int32_t *>(_output->ptr_to_element(id)) = -input_value;
+        break;
+      }
+      case ::arm_compute::DataType::U32:
+      {
+        const auto input_value = *reinterpret_cast<uint32_t *>(_input->ptr_to_element(id));
+        *reinterpret_cast<uint32_t *>(_output->ptr_to_element(id)) = -input_value;
+        break;
+      }
+      default:
+        throw std::runtime_error("Not supported, yet");
+        break;
+    }
+  });
+
+  if (::internal::arm_compute::isGpuMode())
+  {
+    CAST_CL(_input)->unmap(queue);
+    CAST_CL(_output)->unmap(queue);
+  }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/PadLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleNeg.h
index cb3f36337..4ca88e7f8 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/PadLayer.h
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleNeg.h
@@ -1,41 +1,39 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __PAD_LAYER_H__
-#define __PAD_LAYER_H__
-
-#include <arm_compute/runtime/CL/CLTensor.h>
-#include <arm_compute/runtime/CL/functions/CLFillBorder.h>
-
-class PadLayer : public ::arm_compute::IFunction
-{
-public:
-  void configure(::arm_compute::ICLTensor *input, ::arm_compute::ICLTensor *output,
-                 unsigned int border_width);
-  void run(void) override;
-
-private:
-  ::arm_compute::ICLTensor *_input;
-  ::arm_compute::ICLTensor *_output;
-  int _border_width;
-  int _output_height;
-  int _output_width;
-
-  ::arm_compute::CLFillBorder _fillborderkernel;
-  void populateOutput();
-};
-
-#endif // __PAD_LAYER_H__
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SIMPLE_NEG_H__
+#define __SIMPLE_NEG_H__
+
+#include "internal/arm_compute.h"
+
+class SimpleNeg : public ::arm_compute::IFunction
+{
+public:
+  SimpleNeg(void) : _input(nullptr), _output(nullptr)
+  {
+    // DO NOTHING
+  }
+
+  void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output);
+
+  void run() override;
+
+private:
+  ::arm_compute::ITensor *_input;
+  ::arm_compute::ITensor *_output;
+};
+
+#endif /*__SIMPLE_NEG_H__ */
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.cc
new file mode 100644
index 000000000..2a0a25f0c
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.cc
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "internal/arm_compute.h"
+#include "SimplePackLayer.h"
+
+void SimplePackLayer::configure(const std::vector<::arm_compute::ICLTensor *> &input_vector,
+                                ::arm_compute::ICLTensor *output, int32_t axis)
+{
+  uint32_t nr_inputs = input_vector.size();
+  uint32_t output_rank = output->info()->num_dimensions();
+  const ::arm_compute::PermutationVector pv{1, 2, 0};
+  _cl_permuted_vector.resize(nr_inputs);
+  _cl_permute_vector.resize(nr_inputs);
+
+  _output = output;
+  // A negative axis implies axis from the end.
+  // For example, axis = -1 implies the first axis from the end, i.e. axis = Rank - 1.
+  // Similarly, axis = -2 imples second axis from the end, i.e. axis = Rank - 2.
+  if (axis < 0)
+  {
+    axis += output_rank;
+  }
+  _axis = ToARMComputeAxis(output_rank, axis).value();
+  _cl_reshape_vector.resize(nr_inputs);
+
+  ::arm_compute::TensorShape subTensor_shape{};
+  for (int i = 0; i < output_rank; i++)
+  {
+    if (i != _axis)
+    {
+      subTensor_shape.set(i, _output->info()->tensor_shape()[i]);
+    }
+    else
+    {
+      subTensor_shape.set(i, 1);
+    }
+  }
+
+  auto subTensor_offset = ::arm_compute::Coordinates{};
+  subTensor_offset.set_num_dimensions(output_rank);
+
+  for (int i = 0; i < input_vector.size(); i++)
+  {
+    _input_vector.push_back(input_vector[i]);
+    subTensor_offset[_axis] = i;
+    auto temp_tensor = std::make_shared<::arm_compute::CLSubTensor>(
+        CAST_CL(_output), subTensor_shape, subTensor_offset, true);
+    _sub_tensor_vector.push_back(temp_tensor);
+    // configure to resize of input tensor in sub tensor offseted, dimension expansion will be
+    // automatic
+    _cl_permute_vector[i].configure(CAST_CL(_input_vector[i]), &_cl_permuted_vector[i], pv);
+    _cl_reshape_vector[i].configure(&_cl_permuted_vector[i], _sub_tensor_vector[i].get());
+    _cl_permuted_vector[i].allocator()->allocate();
+  }
+}
+
+void SimplePackLayer::run(void)
+{
+  for (int i = 0; i < _input_vector.size(); i++)
+  {
+    _cl_permute_vector[i].run();
+    _cl_reshape_vector[i].run();
+  }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.h
new file mode 100644
index 000000000..2c2fc37f2
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimplePackLayer.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __SIMPLE_PACK_LAYER_H__
+#define __SIMPLE_PACK_LAYER_H__
+
+#include <arm_compute/runtime/CL/CLTensor.h>
+#include <arm_compute/runtime/CL/CLSubTensor.h>
+#include <arm_compute/runtime/CL/functions/CLReshapeLayer.h>
+#include <arm_compute/runtime/CL/functions/CLPermute.h>
+
+class SimplePackLayer : public ::arm_compute::IFunction
+{
+public:
+  SimplePackLayer(void)
+      : _cl_permuted_vector{}, _input_vector{}, _sub_tensor_vector{}, _cl_reshape_vector{},
+        _cl_permute_vector{}, _output(nullptr), _axis(0)
+  {
+    // DO NOTHING
+  }
+
+public:
+  void configure(const std::vector<::arm_compute::ICLTensor *> &input_vector,
+                 ::arm_compute::ICLTensor *output, int axis);
+
+public:
+  void run(void) override;
+
+private:
+  std::vector<::arm_compute::CLTensor> _cl_permuted_vector;
+  std::vector<::arm_compute::ICLTensor *> _input_vector;
+  std::vector<std::shared_ptr<::arm_compute::CLSubTensor>> _sub_tensor_vector;
+  std::vector<::arm_compute::CLReshapeLayer> _cl_reshape_vector;
+  std::vector<::arm_compute::CLPermute> _cl_permute_vector;
+  ::arm_compute::ICLTensor *_output;
+  int _axis;
+};
+
+#endif // __SIMPLE_PACK_LAYER_H__
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.cc
new file mode 100644
index 000000000..64236603f
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.cc
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/layers/SimplePadLayer.h"
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+namespace
+{
+bool validate_arg(const ::arm_compute::ITensor *input, const ::arm_compute::ITensor *output,
+                  const ::arm_compute::ITensor *padding_size,
+                  const ::arm_compute::Coordinates &axises)
+{
+  const int input_batch = input->info()->tensor_shape()[axises[0]];
+  const int input_height = input->info()->tensor_shape()[axises[1]];
+  const int input_width = input->info()->tensor_shape()[axises[2]];
+  const int input_depth = input->info()->tensor_shape()[axises[3]];
+
+  const int output_batch = output->info()->tensor_shape()[axises[0]];
+  const int output_height = output->info()->tensor_shape()[axises[1]];
+  const int output_width = output->info()->tensor_shape()[axises[2]];
+  const int output_depth = output->info()->tensor_shape()[axises[3]];
+
+  auto pad_batch_up = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 0}));
+  auto pad_batch_down = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({1, 0}));
+  auto pad_height_top = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 1}));
+  auto pad_height_bottom = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({1, 1}));
+  auto pad_width_left = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 2}));
+  auto pad_width_right = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({1, 2}));
+  auto pad_depth_front = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 3}));
+  auto pad_depth_back = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({1, 3}));
+
+  const int padded_batch = input_batch + pad_batch_up + pad_batch_down;
+  const int padded_height = input_height + pad_height_top + pad_height_bottom;
+  const int padded_width = input_width + pad_width_left + pad_width_right;
+  const int padded_depth = input_depth + pad_depth_front + pad_depth_back;
+
+  return (padded_batch == output_batch) && (padded_height == output_height) &&
+         (padded_width == output_width) && (padded_depth == output_depth);
+}
+} // namespace
+
+void SimplePadLayer::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
+                               ::arm_compute::ITensor *padding_size,
+                               const ::arm_compute::Coordinates &axises)
+{
+
+  const auto rank = axises.num_dimensions();
+  assert(rank == 4);
+  assert(input != nullptr && output != nullptr && padding_size != nullptr);
+
+  for (int i = 0; i < rank; ++i)
+  {
+    assert(axises[i] >= 0);
+    assert(axises[i] < rank);
+  }
+
+  _input = input;
+  _output = output;
+  _padding_size = padding_size;
+  _axises = axises;
+}
+
+template <typename T>
+inline void ApplyPadding(const ::arm_compute::ITensor *input_data,
+                         const ::arm_compute::TensorShape &input_shape,
+                         const ::arm_compute::ITensor *padding_size,
+                         ::arm_compute::ITensor *output_data,
+                         const ::arm_compute::TensorShape &output_shape,
+                         const ::arm_compute::Coordinates &axises, T zero_value)
+{
+
+  assert(validate_arg(input_data, output_data, padding_size, axises) &&
+         "Padded Input shape does not match to output shape");
+
+  const int input_batch = input_shape[axises[0]];
+  const int input_height = input_shape[axises[1]];
+  const int input_width = input_shape[axises[2]];
+  const int input_depth = input_shape[axises[3]];
+
+  const int output_batch = output_shape[axises[0]];
+  const int output_height = output_shape[axises[1]];
+  const int output_width = output_shape[axises[2]];
+  const int output_depth = output_shape[axises[3]];
+
+  // Padding size for Up, Top, Left and Front are required.
+  auto pad_batch_up = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 0}));
+  auto pad_height_top = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 1}));
+  auto pad_width_left = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 2}));
+  auto pad_depth_front = *reinterpret_cast<const int32_t *>(padding_size->ptr_to_element({0, 3}));
+
+  for (int out_b = 0; out_b < output_batch; ++out_b)
+  {
+    for (int out_h = 0; out_h < output_height; ++out_h)
+    {
+      for (int out_w = 0; out_w < output_width; ++out_w)
+      {
+        for (int out_d = 0; out_d < output_depth; ++out_d)
+        {
+          auto output_id = asARMComputeCoordinates(
+              ::arm_compute::Coordinates{out_b, out_h, out_w, out_d}, axises);
+
+          if (out_b < pad_batch_up || out_b >= (input_batch + pad_batch_up) ||
+              out_h < pad_height_top || out_h >= (input_height + pad_height_top) ||
+              out_w < pad_width_left || out_w >= (input_width + pad_width_left) ||
+              out_d < pad_depth_front || out_d >= (input_depth + pad_depth_front))
+          {
+            *reinterpret_cast<T *>(output_data->ptr_to_element(output_id)) = zero_value;
+          }
+          else
+          {
+            auto input_id = asARMComputeCoordinates(
+                ::arm_compute::Coordinates{out_b - pad_batch_up, out_h - pad_height_top,
+                                           out_w - pad_width_left, out_d - pad_depth_front},
+                axises);
+            *reinterpret_cast<T *>(output_data->ptr_to_element(output_id)) =
+                *reinterpret_cast<T *>(input_data->ptr_to_element(input_id));
+          }
+        }
+      }
+    }
+  }
+}
+void SimplePadLayer::run()
+{
+  if (::internal::arm_compute::isGpuMode())
+  {
+    auto &q = ::arm_compute::CLScheduler::get().queue();
+
+    CAST_CL(_input)->map(q);
+    CAST_CL(_output)->map(q);
+    CAST_CL(_padding_size)->map(q);
+  }
+
+  switch (_input->info()->data_type())
+  {
+    case ::arm_compute::DataType::U8:
+    case ::arm_compute::DataType::QASYMM8:
+      ApplyPadding<uint8_t>(_input, _input->info()->tensor_shape(), _padding_size, _output,
+                            _output->info()->tensor_shape(), _axises,
+                            _input->info()->quantization_info().offset);
+      break;
+    case ::arm_compute::DataType::F32:
+      ApplyPadding<float>(_input, _input->info()->tensor_shape(), _padding_size, _output,
+                          _output->info()->tensor_shape(), _axises, 0.0f);
+      break;
+    default:
+      ARM_COMPUTE_ERROR("DataType not supported");
+      break;
+  }
+
+  if (::internal::arm_compute::isGpuMode())
+  {
+    auto &q = ::arm_compute::CLScheduler::get().queue();
+
+    CAST_CL(_input)->unmap(q);
+    CAST_CL(_output)->unmap(q);
+    CAST_CL(_padding_size)->unmap(q);
+  }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.h
new file mode 100644
index 000000000..8cb6659ce
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimplePadLayer.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SIMPLE_PAD_LAYER_H__
+#define __SIMPLE_PAD_LAYER_H__
+
+#include "internal/arm_compute.h"
+#include "internal/arm_compute/Cast.h"
+
+class SimplePadLayer : public ::arm_compute::IFunction
+{
+public:
+  SimplePadLayer(void) : _input(nullptr), _output(nullptr), _padding_size(nullptr), _axises{}
+  {
+    // DO NOTHING
+  }
+
+  void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
+                 ::arm_compute::ITensor *padding_size,
+                 const ::arm_compute::Coordinates &axises = getARMComputeAxises(4));
+
+  void run(void) override;
+
+private:
+  ::arm_compute::ITensor *_input;
+  ::arm_compute::ITensor *_output;
+  ::arm_compute::ITensor *_padding_size;
+  ::arm_compute::Coordinates _axises;
+};
+
+#endif // __SIMPLE_PAD_LAYER_H__
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.cc
new file mode 100644
index 000000000..b5b3a0950
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.cc
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/layers/SimpleSQRT.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+void SimpleSQRT::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output)
+{
+  _input = input;
+  _output = output;
+}
+
+void SimpleSQRT::run()
+{
+  auto &queue = ::arm_compute::CLScheduler::get().queue();
+  if (::internal::arm_compute::isGpuMode())
+  {
+    CAST_CL(_input)->map(queue);
+    CAST_CL(_output)->map(queue);
+  }
+
+  arm_compute::Window window;
+  window.use_tensor_dimensions(_output->info()->tensor_shape());
+
+  execute_window_loop(window, [this](const arm_compute::Coordinates &id) {
+    // NOTE Must be two input tensors of identical type
+    //      Must be output tensor of the same type as input0.
+    assert(_input->info()->data_type() == _output->info()->data_type());
+
+    const auto input_value = *reinterpret_cast<float *>(_input->ptr_to_element(id));
+    *reinterpret_cast<float *>(_output->ptr_to_element(id)) = sqrt(input_value);
+  });
+
+  if (::internal::arm_compute::isGpuMode())
+  {
+    CAST_CL(_input)->unmap(queue);
+    CAST_CL(_output)->unmap(queue);
+  }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.h
new file mode 100644
index 000000000..b05a9e32e
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSQRT.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SIMPLE_SQRT_H__
+#define __SIMPLE_SQRT_H__
+
+#include "internal/arm_compute.h"
+
+class SimpleSQRT : public ::arm_compute::IFunction
+{
+public:
+  SimpleSQRT(void) : _input(nullptr), _output(nullptr)
+  {
+    // DO NOTHING
+  }
+
+  void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output);
+
+  void run() override;
+
+private:
+  ::arm_compute::ITensor *_input;
+  ::arm_compute::ITensor *_output;
+};
+
+#endif /*__SIMPLE_SQRT_H__ */
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.cc
new file mode 100644
index 000000000..f53675b99
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.cc
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/layers/SimpleSpaceToBatchND.h"
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+void SimpleSpaceToBatchND::configure(::arm_compute::ITensor *input,
+                                     ::arm_compute::ITensor *block_size,
+                                     ::arm_compute::ITensor *padding_size,
+                                     ::arm_compute::ITensor *output)
+{
+  const auto rank = input->info()->num_dimensions();
+  assert(rank == 4);
+
+  _input = input;
+  _block_size = block_size;
+  _padding_size = padding_size;
+  _output = output;
+}
+
+template <typename T>
+inline void
+SpaceToBatchND(const ::arm_compute::ITensor *input, const ::arm_compute::TensorShape &input_shape,
+               const ::arm_compute::ITensor *block_size, const ::arm_compute::ITensor *padding_size,
+               const ::arm_compute::ITensor *output, const ::arm_compute::TensorShape &output_shape,
+               T zero_value)
+{
+  const int input_batch = input_shape[3];
+  const int input_height = input_shape[1];
+  const int input_width = input_shape[0];
+
+  const int depth = output_shape[2];
+
+  const int padding_height_left = *reinterpret_cast<int *>(padding_size->ptr_to_element({0, 1}));
+  const int padding_height_right = *reinterpret_cast<int *>(padding_size->ptr_to_element({1, 1}));
+  const int padding_width_left = *reinterpret_cast<int *>(padding_size->ptr_to_element({0, 0}));
+  const int padding_width_right = *reinterpret_cast<int *>(padding_size->ptr_to_element({1, 0}));
+  const int padded_height = input_height + padding_height_left + padding_height_right;
+  const int padded_width = input_width + padding_width_left + padding_width_right;
+
+  const int block_size_height = *reinterpret_cast<int *>(block_size->ptr_to_element({1}));
+  const int block_size_width = *reinterpret_cast<int *>(block_size->ptr_to_element({0}));
+
+  assert(padding_height_left >= 0);
+  assert(padding_height_right >= 0);
+  assert(padding_width_left >= 0);
+  assert(padding_width_right >= 0);
+  assert(block_size_height >= 1);
+  assert(block_size_width >= 1);
+  assert(padded_height % block_size_height == 0);
+  assert(padded_width % block_size_width == 0);
+  assert(output->info()->dimension(3) ==
+         input->info()->dimension(3) * (block_size_height * block_size_width));
+
+  for (int in_b = 0; in_b < input_batch; ++in_b)
+  {
+    for (int in_d = 0; in_d < depth; ++in_d)
+    {
+      for (int in_h = 0; in_h < padded_height; ++in_h)
+      {
+        for (int in_w = 0; in_w < padded_width; ++in_w)
+        {
+          const int out_d = in_d;
+          const int out_h = in_h / block_size_height;
+          const int out_w = in_w / block_size_width;
+          const int out_b =
+              in_b +
+              ((in_h % block_size_height) * block_size_width + in_w % block_size_width) *
+                  input_batch;
+
+          const ::arm_compute::Coordinates output_id{out_w, out_h, out_d, out_b};
+
+          if (in_h < padding_height_left || in_h >= (input_height + padding_height_left) ||
+              in_w < padding_width_left || in_w >= (input_width + padding_width_left))
+          {
+            *reinterpret_cast<T *>(output->ptr_to_element(output_id)) = zero_value;
+          }
+          else
+          {
+            const ::arm_compute::Coordinates input_id{in_w - padding_width_left,
+                                                      in_h - padding_height_left, in_d, in_b};
+            *reinterpret_cast<T *>(output->ptr_to_element(output_id)) =
+                *reinterpret_cast<T *>(input->ptr_to_element(input_id));
+          }
+        }
+      }
+    }
+  }
+}
+void SimpleSpaceToBatchND::run()
+{
+  if (::internal::arm_compute::isGpuMode())
+  {
+    auto &q = ::arm_compute::CLScheduler::get().queue();
+
+    CAST_CL(_input)->map(q);
+    CAST_CL(_block_size)->map(q);
+    CAST_CL(_padding_size)->map(q);
+    CAST_CL(_output)->map(q);
+  }
+
+  switch (_input->info()->data_type())
+  {
+    case ::arm_compute::DataType::U8:
+    case ::arm_compute::DataType::QASYMM8:
+      SpaceToBatchND<uint8_t>(_input, _input->info()->tensor_shape(), _block_size, _padding_size,
+                              _output, _output->info()->tensor_shape(),
+                              _input->info()->quantization_info().offset);
+      break;
+    case ::arm_compute::DataType::F32:
+      SpaceToBatchND<float>(_input, _input->info()->tensor_shape(), _block_size, _padding_size,
+                            _output, _output->info()->tensor_shape(), 0.0f);
+      break;
+    default:
+      ARM_COMPUTE_ERROR("DataType not supported");
+      break;
+  }
+
+  if (::internal::arm_compute::isGpuMode())
+  {
+    auto &q = ::arm_compute::CLScheduler::get().queue();
+
+    CAST_CL(_input)->unmap(q);
+    CAST_CL(_block_size)->unmap(q);
+    CAST_CL(_padding_size)->unmap(q);
+    CAST_CL(_output)->unmap(q);
+  }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.h
new file mode 100644
index 000000000..4af961d34
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __SIMPLE_SPACE_TO_BATCHND_H__
+#define __SIMPLE_SPACE_TO_BATCHND_H__
+
+#include "internal/arm_compute.h"
+
+class SimpleSpaceToBatchND : public ::arm_compute::IFunction
+{
+public:
+  SimpleSpaceToBatchND(void)
+      : _input(nullptr), _block_size(nullptr), _padding_size(nullptr), _output(nullptr)
+  {
+    // DO NOTHING
+  }
+
+  /** Initialise input and output
+   *
+   * @param[in]  input        First tensor input.
+   * @param[in]  block_size   Block size.
+   * @param[in]  padding_size Padding size.
+   * @param[out] output       Output tensor.
+   */
+  void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *block_size,
+                 ::arm_compute::ITensor *padding_size, ::arm_compute::ITensor *output);
+
+  void run() override;
+
+private:
+  ::arm_compute::ITensor *_input;
+  ::arm_compute::ITensor *_block_size;
+  ::arm_compute::ITensor *_padding_size;
+  ::arm_compute::ITensor *_output;
+};
+
+#endif /*__SIMPLE_SPACE_TO_BATCHND_H__ */
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.cc
index 682295f81..3519da1f3 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.cc
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.cc
@@ -19,11 +19,8 @@
 #include <arm_compute/runtime/CL/CLScheduler.h>
 
 void SimpleSpaceToDepth::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
-                                   int32_t block_size,
-                                   const ::arm_compute::Coordinates &axises = {3, 1, 0, 2})
+                                   int32_t block_size, const ::arm_compute::Coordinates &axises)
 {
-  assert(input->info()->num_dimensions() == 4);
-  assert(output->info()->num_dimensions() == 4);
   const auto rank = axises.num_dimensions();
   assert(rank == 4);
   for (int i = 0; i < rank; ++i)
@@ -38,26 +35,10 @@ void SimpleSpaceToDepth::configure(::arm_compute::ITensor *input, ::arm_compute:
   _axises = axises;
 }
 
-inline int32_t Offset4D(const ::arm_compute::TensorShape &shape, int32_t b, int32_t h, int32_t w,
-                        int32_t d, const ::arm_compute::Coordinates &axises)
-{
-  // b, h, w, d >= 0
-  size_t indexes[4];
-  indexes[axises[0]] = b;
-  indexes[axises[1]] = h;
-  indexes[axises[2]] = w;
-  indexes[axises[3]] = d;
-
-  int32_t offset = indexes[3] * shape[2] * shape[1] * shape[0];
-  offset += indexes[2] * shape[1] * shape[0];
-  offset += indexes[1] * shape[0];
-  offset += indexes[0];
-  return offset;
-}
-
 template <typename T>
-inline void SpaceToDepth(const T *input_data, const ::arm_compute::TensorShape &input_shape,
-                         int32_t block_size, T *output_data,
+inline void SpaceToDepth(const ::arm_compute::ITensor *input,
+                         const ::arm_compute::TensorShape &input_shape, int32_t block_size,
+                         ::arm_compute::ITensor *output,
                          const ::arm_compute::TensorShape &output_shape,
                          const ::arm_compute::Coordinates &axises)
 {
@@ -66,16 +47,6 @@ inline void SpaceToDepth(const T *input_data, const ::arm_compute::TensorShape &
   const int input_width = input_shape[axises[2]];
   const int input_depth = input_shape[axises[3]];
 
-  const int output_batch = output_shape[axises[0]];
-  const int output_height = output_shape[axises[1]];
-  const int output_width = output_shape[axises[2]];
-  const int output_depth = output_shape[axises[3]];
-
-  assert(input_batch == output_batch);
-  assert(input_height == output_height * block_size);
-  assert(input_width == output_width * block_size);
-  assert(input_depth * block_size * block_size == output_depth);
-
   for (int in_b = 0; in_b < input_batch; ++in_b)
   {
     for (int in_h = 0; in_h < input_height; ++in_h)
@@ -90,10 +61,13 @@ inline void SpaceToDepth(const T *input_data, const ::arm_compute::TensorShape &
           const int out_d =
               in_d + ((in_h % block_size) * block_size + in_w % block_size) * input_depth;
 
-          const int input_index = Offset4D(input_shape, in_b, in_h, in_w, in_d, axises);
-          const int output_index = Offset4D(output_shape, out_b, out_h, out_w, out_d, axises);
+          auto input_id =
+              asARMComputeCoordinates(::arm_compute::Coordinates{in_b, in_h, in_w, in_d}, axises);
+          auto output_id = asARMComputeCoordinates(
+              ::arm_compute::Coordinates{out_b, out_h, out_w, out_d}, axises);
 
-          output_data[output_index] = input_data[input_index];
+          *reinterpret_cast<T *>(output->ptr_to_element(output_id)) =
+              *reinterpret_cast<T *>(input->ptr_to_element(input_id));
         }
       }
     }
@@ -110,35 +84,16 @@ void SimpleSpaceToDepth::run()
     CAST_CL(_output)->map(q);
   }
 
-  auto input_buf = _input->buffer();
-  auto output_buf = _output->buffer();
   switch (_input->info()->data_type())
   {
     case ::arm_compute::DataType::U8:
     case ::arm_compute::DataType::QASYMM8:
-      SpaceToDepth(reinterpret_cast<const uint8_t *>(input_buf), _input->info()->tensor_shape(),
-                   _block_size, reinterpret_cast<uint8_t *>(output_buf),
-                   _output->info()->tensor_shape(), _axises);
-      break;
-    case ::arm_compute::DataType::S8:
-      SpaceToDepth(reinterpret_cast<const int8_t *>(input_buf), _input->info()->tensor_shape(),
-                   _block_size, reinterpret_cast<int8_t *>(output_buf),
-                   _output->info()->tensor_shape(), _axises);
-      break;
-    case ::arm_compute::DataType::U32:
-      SpaceToDepth(reinterpret_cast<const uint32_t *>(input_buf), _input->info()->tensor_shape(),
-                   _block_size, reinterpret_cast<uint32_t *>(output_buf),
-                   _output->info()->tensor_shape(), _axises);
-      break;
-    case ::arm_compute::DataType::S32:
-      SpaceToDepth(reinterpret_cast<const int32_t *>(input_buf), _input->info()->tensor_shape(),
-                   _block_size, reinterpret_cast<int32_t *>(output_buf),
-                   _output->info()->tensor_shape(), _axises);
+      SpaceToDepth<uint8_t>(_input, _input->info()->tensor_shape(), _block_size, _output,
+                            _output->info()->tensor_shape(), _axises);
       break;
     case ::arm_compute::DataType::F32:
-      SpaceToDepth(reinterpret_cast<const float *>(input_buf), _input->info()->tensor_shape(),
-                   _block_size, reinterpret_cast<float *>(output_buf),
-                   _output->info()->tensor_shape(), _axises);
+      SpaceToDepth<float>(_input, _input->info()->tensor_shape(), _block_size, _output,
+                          _output->info()->tensor_shape(), _axises);
       break;
     default:
       ARM_COMPUTE_ERROR("DataType not supported");
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.h
index f5e028b1c..9e87c364c 100644
--- a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.h
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.h
@@ -14,25 +14,44 @@
  * limitations under the License.
  */
 
+/**
+ * @file        SimpleSpaceToDepth.h
+ * @brief       This file contains SimpleSpaceToDepth class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
 #ifndef __SIMPLE_SPACE_TO_DEPTH_H__
 #define __SIMPLE_SPACE_TO_DEPTH_H__
 
 #include "internal/arm_compute.h"
-#include <arm_compute/core/ITensor.h>
-#include <arm_compute/runtime/IFunction.h>
+#include "internal/arm_compute/Cast.h"
 
+/**
+ * @brief Class to run SimpleEmbeddingLookup Layer
+ */
 class SimpleSpaceToDepth : public ::arm_compute::IFunction
 {
 public:
-  /** Initialise input and output
-   *
-   * @param[in]  input       First tensor input.
-   * @param[out] output      Output tensor.
-   * @param[in]  block_size  Block size.
+  SimpleSpaceToDepth(void) : _input(nullptr), _output(nullptr), _block_size(0), _axises{}
+  {
+    // DO NOTHING
+  }
+
+  /**
+   * @brief Configure the layer
+   * @param[in] input       First tensor input.
+   * @param[in] output      Output tensor.
+   * @param[in] block_size  Block size.
+   * @param[in] axises      Axises of rank 4
+   * @return N/A
    */
   void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, int32_t block_size,
-                 const ::arm_compute::Coordinates &axises);
+                 const ::arm_compute::Coordinates &axises = getARMComputeAxises(4));
 
+  /**
+   * @brief Run the operation. Must be called after configure().
+   * @return N/A
+   */
   void run() override;
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.cc
new file mode 100644
index 000000000..abc291289
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.cc
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/layers/SimpleTransposeConv.h"
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+void SimpleTransposeConv::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *weights,
+                                    ::arm_compute::ITensor *output,
+                                    ::arm_compute::PadStrideInfo &tconv_info,
+                                    ::arm_compute::Coordinates axises)
+{
+  auto rank = axises.num_dimensions();
+
+  assert(rank == 4);
+
+  _input = input;
+  _weights = weights;
+  _output = output;
+  _stride_width = tconv_info.stride().first;
+  _stride_height = tconv_info.stride().second;
+  _pad_width = tconv_info.pad_left();
+  _pad_height = tconv_info.pad_top();
+  _axises = axises;
+}
+
+template <typename T>
+inline void ApplyTransposeConv(
+    const ::arm_compute::TensorShape &input_shape, const ::arm_compute::ITensor *input_data,
+    const ::arm_compute::TensorShape &filter_shape, const ::arm_compute::ITensor *filter_data,
+    const ::arm_compute::TensorShape &output_shape, const ::arm_compute::ITensor *output_data,
+    const int32_t stride_width, const int32_t stride_height, const int32_t pad_width,
+    const int32_t pad_height, const ::arm_compute::Coordinates axises)
+{
+  const int batches = input_shape[axises[0]];
+  const int input_height = input_shape[axises[1]];
+  const int input_width = input_shape[axises[2]];
+  const int input_depth = input_shape[axises[3]];
+
+  const int filter_height = filter_shape[axises[1]];
+  const int filter_width = filter_shape[axises[2]];
+
+  const int output_height = output_shape[axises[1]];
+  const int output_width = output_shape[axises[2]];
+  const int output_depth = output_shape[axises[3]];
+
+  assert(batches == output_shape[axises[0]]);
+  assert(input_depth == filter_shape[axises[3]]);
+  assert(filter_shape[axises[0]] == output_depth);
+
+  // Although transpose convolution simplifies to convolution with transposed
+  // weights for strides of 1, non-unitary striding complicates matters. To
+  // keep this reference implementation as clear as possible, we use a
+  // "scatter" access pattern, where we loop through all the input elements,
+  // computing their influence on the output, rather than looping through the
+  // output elements in the typical "gather" access pattern of a conv. We
+  // therefore must initialize the output array to zero.
+
+  // Loop through input elements one at a time.
+  for (int batch = 0; batch < batches; ++batch)
+  {
+    for (int in_y = 0; in_y < input_height; ++in_y)
+    {
+      for (int in_x = 0; in_x < input_width; ++in_x)
+      {
+        for (int in_channel = 0; in_channel < input_depth; ++in_channel)
+        {
+          // Loop through the output elements it will influence
+          const int out_x_origin = (in_x * stride_width) - pad_width;
+          const int out_y_origin = (in_y * stride_height) - pad_height;
+          for (int filter_y = 0; filter_y < filter_height; ++filter_y)
+          {
+            for (int filter_x = 0; filter_x < filter_width; ++filter_x)
+            {
+              for (int out_channel = 0; out_channel < output_depth; ++out_channel)
+              {
+                // Compute output element location
+                const int out_x = out_x_origin + filter_x;
+                const int out_y = out_y_origin + filter_y;
+                // We cannot accumulate out of bounds
+                if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) &&
+                    (out_y < output_height))
+                {
+                  auto input_id = asARMComputeCoordinates(
+                      ::arm_compute::Coordinates{batch, in_y, in_x, in_channel}, axises);
+                  auto filter_id = asARMComputeCoordinates(
+                      ::arm_compute::Coordinates{in_channel, filter_y, filter_x, out_channel},
+                      axises);
+                  auto output_id = asARMComputeCoordinates(
+                      ::arm_compute::Coordinates{batch, out_y, out_x, out_channel}, axises);
+                  T input_value = *reinterpret_cast<T *>(input_data->ptr_to_element(input_id));
+                  T filter_value = *reinterpret_cast<T *>(filter_data->ptr_to_element(filter_id));
+                  *reinterpret_cast<T *>(output_data->ptr_to_element(output_id)) +=
+                      input_value * filter_value;
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+void SimpleTransposeConv::run()
+{
+  if (::internal::arm_compute::isGpuMode())
+  {
+    auto &q = ::arm_compute::CLScheduler::get().queue();
+
+    CAST_CL(_input)->map(q);
+    CAST_CL(_weights)->map(q);
+    CAST_CL(_output)->map(q);
+  }
+
+  switch (_input->info()->data_type())
+  {
+    case ::arm_compute::DataType::S32:
+      ApplyTransposeConv<int32_t>(_input->info()->tensor_shape(), _input,
+                                  _weights->info()->tensor_shape(), _weights,
+                                  _output->info()->tensor_shape(), _output, _stride_width,
+                                  _stride_height, _pad_width, _pad_height, _axises);
+      break;
+    case ::arm_compute::DataType::F32:
+      ApplyTransposeConv<float>(_input->info()->tensor_shape(), _input,
+                                _weights->info()->tensor_shape(), _weights,
+                                _output->info()->tensor_shape(), _output, _stride_width,
+                                _stride_height, _pad_width, _pad_height, _axises);
+      break;
+    default:
+      ARM_COMPUTE_ERROR("DataType not supported");
+      break;
+  }
+
+  if (::internal::arm_compute::isGpuMode())
+  {
+    auto &q = ::arm_compute::CLScheduler::get().queue();
+
+    CAST_CL(_input)->unmap(q);
+    CAST_CL(_weights)->unmap(q);
+    CAST_CL(_output)->unmap(q);
+  }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.h
new file mode 100644
index 000000000..c5519828b
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleTransposeConv.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __TRANSPOSE_CONV_EX__
+#define __TRANSPOSE_CONV_EX__
+
+#include "internal/arm_compute.h"
+#include "internal/arm_compute/Cast.h"
+
+class SimpleTransposeConv : public ::arm_compute::IFunction
+{
+public:
+  SimpleTransposeConv()
+      : _input(nullptr), _weights(nullptr), _output(nullptr), _stride_width(0), _stride_height(0),
+        _pad_width(0), _pad_height(0)
+  {
+    // DO NOTHING
+  }
+
+  /** Initialise input and output
+   *
+   * @param[in]  input              First tensor input.
+   * @param[in]  weights            Weights
+   * @param[out] output             Output tensor.
+   * @param[in]  tc_info            Contains padding and policies to be used in the deconvolution,
+   *                                this is decribed in @ref PadStrideInfo.
+   * @param[in]  axises             Axises of rank 4
+   */
+  void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *weights,
+                 ::arm_compute::ITensor *output, ::arm_compute::PadStrideInfo &tconv_info,
+                 ::arm_compute::Coordinates axises = getARMComputeAxises(4));
+
+  void run() override;
+
+private:
+  ::arm_compute::ITensor *_input;
+  ::arm_compute::ITensor *_weights;
+  ::arm_compute::ITensor *_output;
+  int32_t _stride_width;
+  int32_t _stride_height;
+  int32_t _pad_width;
+  int32_t _pad_height;
+  ::arm_compute::Coordinates _axises;
+};
+
+#endif /*__TRANSPOSE_CONV_EX__ */
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.cc
new file mode 100644
index 000000000..910595a44
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.cc
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "internal/arm_compute.h"
+#include "SimpleUnpackLayer.h"
+
+void SimpleUnpackLayer::configure(::arm_compute::ICLTensor *input,
+                                  const std::vector<::arm_compute::ICLTensor *> &output_vector,
+                                  int32_t axis)
+{
+  uint32_t nr_outputs = output_vector.size();
+  _cl_permuted_vector.resize(nr_outputs);
+  _cl_permute_vector.resize(nr_outputs);
+  uint32_t input_rank = input->info()->num_dimensions();
+  const ::arm_compute::PermutationVector pv{2, 0, 1};
+  _input = input;
+  // Negatige axis is supported, -1 implies R-1 axis where R is input rank
+  if (axis < 0)
+  {
+    axis += input_rank;
+  }
+  _axis = ToARMComputeAxis(input_rank, axis).value();
+  _cl_reshape_vector.resize(nr_outputs);
+
+  ::arm_compute::TensorShape subTensor_shape{};
+  for (int i = 0; i < input_rank; i++)
+  {
+    if (i != _axis)
+    {
+      subTensor_shape.set(i, _input->info()->tensor_shape()[i]);
+    }
+    else
+    {
+      subTensor_shape.set(i, 1);
+    }
+  }
+
+  auto subTensor_offset = ::arm_compute::Coordinates{};
+  subTensor_offset.set_num_dimensions(input_rank);
+
+  for (int i = 0; i < output_vector.size(); i++)
+  {
+    _output_vector.push_back(output_vector[i]);
+    subTensor_offset[_axis] = i;
+    auto temp_tensor = std::make_shared<::arm_compute::CLSubTensor>(
+        CAST_CL(_input), subTensor_shape, subTensor_offset, true);
+    _sub_tensor_vector.push_back(temp_tensor);
+    // Copies into the subtensor
+    _cl_permute_vector[i].configure(_sub_tensor_vector[i].get(), &_cl_permuted_vector[i], pv);
+    _cl_reshape_vector[i].configure(&_cl_permuted_vector[i], CAST_CL(_output_vector[i]));
+    _cl_permuted_vector[i].allocator()->allocate();
+  }
+}
+
+void SimpleUnpackLayer::run(void)
+{
+  for (int i = 0; i < _output_vector.size(); i++)
+  {
+    _cl_permute_vector[i].run();
+    _cl_reshape_vector[i].run();
+  }
+}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.h
new file mode 100644
index 000000000..52fc7513d
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleUnpackLayer.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __UNPACK_LAYER_H__
+#define __UNPACK_LAYER_H__
+
+#include <arm_compute/runtime/CL/CLTensor.h>
+#include <arm_compute/runtime/CL/CLSubTensor.h>
+#include <arm_compute/runtime/CL/functions/CLReshapeLayer.h>
+#include <arm_compute/runtime/CL/functions/CLPermute.h>
+
+class SimpleUnpackLayer : public ::arm_compute::IFunction
+{
+public:
+  SimpleUnpackLayer(void)
+      : _cl_permuted_vector{}, _output_vector{}, _sub_tensor_vector{}, _cl_reshape_vector{},
+        _cl_permute_vector{}, _input(nullptr), _axis(0)
+  {
+    // DO NOTHING
+  }
+
+public:
+  void configure(::arm_compute::ICLTensor *input,
+                 const std::vector<::arm_compute::ICLTensor *> &output_vector, int32_t axis);
+
+public:
+  void run(void) override;
+
+private:
+  std::vector<::arm_compute::CLTensor> _cl_permuted_vector;
+  std::vector<::arm_compute::ICLTensor *> _output_vector;
+  std::vector<std::shared_ptr<::arm_compute::CLSubTensor>> _sub_tensor_vector;
+  std::vector<::arm_compute::CLReshapeLayer> _cl_reshape_vector;
+  std::vector<::arm_compute::CLPermute> _cl_permute_vector;
+  ::arm_compute::ICLTensor *_input;
+  int32_t _axis;
+};
+
+#endif // __UNPACK_LAYER_H__
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.cc b/runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.cc
deleted file mode 100644
index 3f988a819..000000000
--- a/runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-#include "SquaredDifferenceOperation.h"
-#include "internal/arm_compute.h"
-
-void SquaredDifferenceOperation::configure(::arm_compute::ITensor *input1,
-                                           ::arm_compute::ITensor *input2,
-                                           ::arm_compute::ITensor *output,
-                                           ::arm_compute::ConvertPolicy ConvertPolicy, float scale,
-                                           ::arm_compute::RoundingPolicy RoundingPolicy)
-{
-  _input1 = input1;
-  _input2 = input2;
-  _output = output;
-
-  if (::internal::arm_compute::isGpuMode())
-  {
-    _cl_sub.configure(CAST_CL(input1), CAST_CL(input2), CAST_CL(output), ConvertPolicy);
-    _cl_mul.configure(CAST_CL(output), CAST_CL(output), CAST_CL(output), scale, ConvertPolicy,
-                      RoundingPolicy);
-  }
-  else
-  {
-    _neon_sub.configure(CAST_NE(input1), CAST_NE(input2), CAST_NE(output), ConvertPolicy);
-    _neon_mul.configure(CAST_NE(output), CAST_NE(output), CAST_NE(output), scale, ConvertPolicy,
-                        RoundingPolicy);
-  }
-}
-
-void SquaredDifferenceOperation::run(void)
-{
-  if (::internal::arm_compute::isGpuMode())
-  {
-    _cl_sub.run();
-    _cl_mul.run();
-  }
-  else
-  {
-    _neon_sub.run();
-    _neon_mul.run();
-  }
-}
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.h b/runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.h
deleted file mode 100644
index 3782c4e8c..000000000
--- a/runtimes/pure_arm_compute/src/internal/layers/SquaredDifferenceOperation.h
+++ /dev/null
@@ -1,35 +0,0 @@
-#ifndef __SQUARED_DIFFERENCE_OPERATION_H__
-#define __SQUARED_DIFFERENCE_OPERATION_H__
-
-#include <arm_compute/runtime/Tensor.h>
-#include <arm_compute/runtime/CL/CLTensor.h>
-
-#include <arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h>
-#include <arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h>
-#include <arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h>
-#include <arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h>
-
-class SquaredDifferenceOperation : public ::arm_compute::IFunction
-{
-public:
-  void configure(::arm_compute::ITensor *input1, ::arm_compute::ITensor *input2,
-                 ::arm_compute::ITensor *output, ::arm_compute::ConvertPolicy ConvertPolicy,
-                 float scale, ::arm_compute::RoundingPolicy RoundingPolicy);
-
-public:
-  void run(void) override;
-
-private:
-  ::arm_compute::ITensor *_input1;
-  ::arm_compute::ITensor *_input2;
-
-  ::arm_compute::ITensor *_output;
-
-private:
-  ::arm_compute::CLArithmeticSubtraction _cl_sub;
-  ::arm_compute::CLPixelWiseMultiplication _cl_mul;
-
-  ::arm_compute::NEArithmeticSubtraction _neon_sub;
-  ::arm_compute::NEPixelWiseMultiplication _neon_mul;
-};
-#endif // __SQUARED_DIFFERENCE_OPERATION_H__
diff --git a/runtimes/pure_arm_compute/src/internal/nnapi/feature/Reader.h b/runtimes/pure_arm_compute/src/internal/nnapi/feature/Reader.h
index 764b9b13a..ac25692a1 100644
--- a/runtimes/pure_arm_compute/src/internal/nnapi/feature/Reader.h
+++ b/runtimes/pure_arm_compute/src/internal/nnapi/feature/Reader.h
@@ -14,12 +14,17 @@
  * limitations under the License.
  */
 
+/**
+ * @file    Reader.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines internal::nnapi::feature::Reader
+ */
 #ifndef __INTERNAL_NNAPI_FEATURE_READER_H__
 #define __INTERNAL_NNAPI_FEATURE_READER_H__
 
 #include "internal/nnapi/feature/Utils.h"
 
-#include "util/feature/Reader.h"
+#include "misc/feature/Reader.h"
 
 namespace internal
 {
@@ -28,20 +33,40 @@ namespace nnapi
 namespace feature
 {
 
-template <typename T> class Reader final : public nnfw::util::feature::Reader<T>
+/**
+ * @brief Class to support reading element in feature(3D, 4D)
+ */
+template <typename T> class Reader final : public nnfw::misc::feature::Reader<T>
 {
 public:
+  /**
+   * @brief     Construct a new Reader object
+   * @param[in] shape Shape of feature
+   * @param[in] ptr   Pointer to feature data
+   * @param[in] len   Size of tensor (byte)
+   */
   // NOTE The parameter len denotes the number of bytes.
-  Reader(const ::nnfw::util::feature::Shape &shape, const T *ptr, size_t len)
+  Reader(const ::nnfw::misc::feature::Shape &shape, const T *ptr, size_t len)
       : _shape{shape}, _ptr{ptr}
   {
     assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len);
   }
 
 public:
-  const nnfw::util::feature::Shape &shape(void) const { return _shape; }
+  /**
+   * @brief   Get shape of feature
+   * @return  Shape of feature
+   */
+  const nnfw::misc::feature::Shape &shape(void) const { return _shape; }
 
 public:
+  /**
+   * @brief     Get value of element using channel, row, and column index for 3D feature
+   * @param[in] ch  Channel index
+   * @param[in] row Row index
+   * @param[in] col Column index
+   * @return    Value of element
+   */
   T at(uint32_t ch, uint32_t row, uint32_t col) const override
   {
     uint32_t index = index_of(_shape, ch, row, col);
@@ -51,6 +76,14 @@ public:
     return arr[index];
   }
 
+  /**
+   * @brief     Get value of element using batch, channel, row, and column index for 4D feature
+   * @param[in] batch Batch index
+   * @param[in] ch    Channel index
+   * @param[in] row   Row index
+   * @param[in] col   Column index
+   * @return    Value of element
+   */
   T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const override
   {
     uint32_t index = index_of(_shape, batch, ch, row, col);
@@ -59,7 +92,7 @@ public:
   }
 
 private:
-  nnfw::util::feature::Shape _shape;
+  nnfw::misc::feature::Shape _shape;
 
 private:
   const T *_ptr;
diff --git a/runtimes/pure_arm_compute/src/internal/nnapi/feature/Utils.h b/runtimes/pure_arm_compute/src/internal/nnapi/feature/Utils.h
index a64ff5d63..ee59d217e 100644
--- a/runtimes/pure_arm_compute/src/internal/nnapi/feature/Utils.h
+++ b/runtimes/pure_arm_compute/src/internal/nnapi/feature/Utils.h
@@ -14,10 +14,15 @@
  * limitations under the License.
  */
 
+/**
+ * @file Utils.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines utility functions used in internal::nnapi::feature namespace
+ */
 #ifndef __INTERNAL_NNAPI_FEATURE_UTILS_H__
 #define __INTERNAL_NNAPI_FEATURE_UTILS_H__
 
-#include "util/feature/Shape.h"
+#include "misc/feature/Shape.h"
 
 namespace internal
 {
@@ -26,7 +31,15 @@ namespace nnapi
 namespace feature
 {
 
-inline uint32_t index_of(const ::nnfw::util::feature::Shape &shape, uint32_t ch, uint32_t row,
+/**
+ * @brief     Get position of element using channel, row, and column for 3D feature
+ * @param[in] shape Shape of feature
+ * @param[in] ch    Channel index
+ * @param[in] row   Row index
+ * @param[in] col   Column index
+ * @return    Position of element
+ */
+inline uint32_t index_of(const ::nnfw::misc::feature::Shape &shape, uint32_t ch, uint32_t row,
                          uint32_t col)
 {
   uint32_t res = 0;
@@ -39,7 +52,16 @@ inline uint32_t index_of(const ::nnfw::util::feature::Shape &shape, uint32_t ch,
   return res;
 }
 
-inline uint32_t index_of(const ::nnfw::util::feature::Shape &shape, uint32_t batch, uint32_t ch,
+/**
+ * @brief     Get position of element using batch, channel, row, and column for 4D feature
+ * @param[in] shape Shape of feature
+ * @param[in] batch Batch index
+ * @param[in] ch    Channel index
+ * @param[in] row   Row index
+ * @param[in] col   Column index
+ * @return    Position of element
+ */
+inline uint32_t index_of(const ::nnfw::misc::feature::Shape &shape, uint32_t batch, uint32_t ch,
                          uint32_t row, uint32_t col)
 {
   uint32_t res = 0;
diff --git a/runtimes/pure_arm_compute/src/internal/nnapi/feature/View.h b/runtimes/pure_arm_compute/src/internal/nnapi/feature/View.h
index 083b6b055..965e42f1c 100644
--- a/runtimes/pure_arm_compute/src/internal/nnapi/feature/View.h
+++ b/runtimes/pure_arm_compute/src/internal/nnapi/feature/View.h
@@ -14,12 +14,17 @@
  * limitations under the License.
  */
 
+/**
+ * @file    View.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines internal::nnapi::feature::View class
+ */
 #ifndef __INTERNAL_NNAPI_FEATURE_VIEW_H__
 #define __INTERNAL_NNAPI_FEATURE_VIEW_H__
 
 #include "internal/nnapi/feature/Utils.h"
 
-#include "util/feature/Reader.h"
+#include "misc/feature/Reader.h"
 
 namespace internal
 {
@@ -28,25 +33,55 @@ namespace nnapi
 namespace feature
 {
 
-template <typename T> class View final : public nnfw::util::feature::Reader<T>
+/**
+ * @brief   Class to access feature's element information using index
+ */
+template <typename T> class View final : public nnfw::misc::feature::Reader<T>
 {
 public:
+  /**
+   * @brief     Construct a new View object
+   * @param[in] shape Shape of feature
+   * @param[in] ptr   Pointer to feature data
+   * @param[in] len   Size of feature (byte)
+   * @return
+   */
   // NOTE The parameter len denotes the number of bytes.
-  View(const ::nnfw::util::feature::Shape &shape, T *ptr, size_t len) : _shape{shape}, _ptr{ptr}
+  View(const ::nnfw::misc::feature::Shape &shape, T *ptr, size_t len) : _shape{shape}, _ptr{ptr}
   {
     assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len);
   }
 
 public:
-  const nnfw::util::feature::Shape &shape(void) const { return _shape; }
+  /**
+   * @brief   Get shape of feature
+   * @return  Shape of feature
+   */
+  const nnfw::misc::feature::Shape &shape(void) const { return _shape; }
 
 public:
+  /**
+   * @brief     Get value of element in 3D feature using channel, row, and column index
+   * @param[in] ch  Channel index
+   * @param[in] row Row index
+   * @param[in] col Column index
+   * @return    Value of element
+   */
   T at(uint32_t ch, uint32_t row, uint32_t col) const override
   {
     uint32_t index = index_of(_shape, ch, row, col);
 
     return _ptr[index];
   }
+
+  /**
+   * @brief     Get value of element in 4D feature using batch, channel, row and column index
+   * @param[in] batch Batch index
+   * @param[in] ch    Channel index
+   * @param[in] row   Row index
+   * @param[in] col   Column index
+   * @return    Value of element
+   */
   T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const override
   {
     uint32_t index = index_of(_shape, batch, ch, row, col);
@@ -54,12 +89,28 @@ public:
     return _ptr[index];
   }
 
+  /**
+   * @brief     Get reference of element in 3D feature using channel, row, and column index
+   * @param[in] ch  Channel index
+   * @param[in] row Row index
+   * @param[in] col Column index
+   * @return    Reference of element
+   */
   T &at(uint32_t ch, uint32_t row, uint32_t col)
   {
     uint32_t index = index_of(_shape, ch, row, col);
 
     return _ptr[index];
   }
+
+  /**
+   * @brief     Get reference of element in 4D feature using batch, channel, row and column index
+   * @param[in] batch Batch index
+   * @param[in] ch    Channel index
+   * @param[in] row   Row index
+   * @param[in] col   Column index
+   * @return    Reference of element
+   */
   T &at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col)
   {
     uint32_t index = index_of(_shape, batch, ch, row, col);
@@ -68,7 +119,7 @@ public:
   }
 
 private:
-  nnfw::util::feature::Shape _shape;
+  nnfw::misc::feature::Shape _shape;
 
 private:
   T *_ptr;
diff --git a/runtimes/pure_arm_compute/src/internal/nnapi/kernel/Reader.h b/runtimes/pure_arm_compute/src/internal/nnapi/kernel/Reader.h
index 0853a8c89..ae964f74c 100644
--- a/runtimes/pure_arm_compute/src/internal/nnapi/kernel/Reader.h
+++ b/runtimes/pure_arm_compute/src/internal/nnapi/kernel/Reader.h
@@ -14,11 +14,16 @@
  * limitations under the License.
  */
 
+/**
+ * @file    Reader.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines internal::nnapi::kernel::Reader class
+ */
 #ifndef __INTERNAL_NNAPI_KERNEL_READER_H__
 #define __INTERNAL_NNAPI_KERNEL_READER_H__
 
-#include "util/kernel/Shape.h"
-#include "util/kernel/Reader.h"
+#include "misc/kernel/Shape.h"
+#include "misc/kernel/Reader.h"
 
 namespace internal
 {
@@ -27,20 +32,41 @@ namespace nnapi
 namespace kernel
 {
 
-template <typename T> class Reader final : public nnfw::util::kernel::Reader<T>
+/**
+ * @brief Class to support reading element in kernel
+ */
+template <typename T> class Reader final : public nnfw::misc::kernel::Reader<T>
 {
 public:
+  /**
+   * @brief     Construct a new Reader object
+   * @param[in] shape Shape of kernel
+   * @param[in] ptr   Pointer to kernel data
+   * @param[in] len   Size of kernel (byte)
+   */
   // NOTE The parameter len denotes the number of bytes.
-  Reader(const ::nnfw::util::kernel::Shape &shape, const T *ptr, size_t len)
+  Reader(const ::nnfw::misc::kernel::Shape &shape, const T *ptr, size_t len)
       : _shape{shape}, _ptr{ptr}
   {
     assert(shape.N * shape.C * shape.H * shape.W * sizeof(T) == len);
   }
 
 public:
-  const nnfw::util::kernel::Shape &shape(void) const { return _shape; }
+  /**
+   * @brief   Get shape of kernel
+   * @return  Shape of kernel
+   */
+  const nnfw::misc::kernel::Shape &shape(void) const { return _shape; }
 
 public:
+  /**
+   * @brief     Get value of element for kernel
+   * @param[in] nth Kernel index
+   * @param[in] ch  Channel index
+   * @param[in] row Row index
+   * @param[in] col Column index
+   * @return    Value of element
+   */
   T at(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) const override
   {
     // NNAPI uses NHWC ordering
@@ -55,7 +81,7 @@ public:
   }
 
 private:
-  nnfw::util::kernel::Shape _shape;
+  nnfw::misc::kernel::Shape _shape;
 
 private:
   const T *_ptr;
diff --git a/runtimes/pure_arm_compute/src/internal/nnapi/matrix/Reader.h b/runtimes/pure_arm_compute/src/internal/nnapi/matrix/Reader.h
index f6f0f3908..f03a4be31 100644
--- a/runtimes/pure_arm_compute/src/internal/nnapi/matrix/Reader.h
+++ b/runtimes/pure_arm_compute/src/internal/nnapi/matrix/Reader.h
@@ -14,11 +14,16 @@
  * limitations under the License.
  */
 
+/**
+ * @file    Reader.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines internal::nnapi::matrix::Reader class
+ */
 #ifndef __INTERNAL_NNAPI_MATRIX_READER_H__
 #define __INTERNAL_NNAPI_MATRIX_READER_H__
 
-#include "util/matrix/Shape.h"
-#include "util/matrix/Reader.h"
+#include "misc/matrix/Shape.h"
+#include "misc/matrix/Reader.h"
 
 namespace internal
 {
@@ -27,20 +32,39 @@ namespace nnapi
 namespace matrix
 {
 
-template <typename T> class Reader final : public nnfw::util::matrix::Reader<T>
+/**
+ * @brief   Class to support reading element in matrix
+ */
+template <typename T> class Reader final : public nnfw::misc::matrix::Reader<T>
 {
 public:
+  /**
+   * @brief     Construct a new Reader object
+   * @param[in] shape Shape of matrix
+   * @param[in] ptr   Pointer to matrix data
+   * @param[in] len   Size of matrix (byte)
+   */
   // NOTE The parameter len denotes the number of bytes.
-  Reader(const ::nnfw::util::matrix::Shape &shape, const T *ptr, size_t len)
+  Reader(const ::nnfw::misc::matrix::Shape &shape, const T *ptr, size_t len)
       : _shape{shape}, _ptr{ptr}
   {
     assert(shape.H * shape.W * sizeof(T) == len);
   }
 
 public:
-  const nnfw::util::matrix::Shape &shape(void) const { return _shape; }
+  /**
+   * @brief   Get shape of matrix
+   * @return  Shape of matrix
+   */
+  const nnfw::misc::matrix::Shape &shape(void) const { return _shape; }
 
 public:
+  /**
+   * @brief     Get value of element for matrix
+   * @param[in] row Row index
+   * @param[in] col Column index
+   * @return    Value of element
+   */
   T at(uint32_t row, uint32_t col) const override
   {
     // NNAPI uses NHWC ordering
@@ -53,7 +77,7 @@ public:
   }
 
 private:
-  nnfw::util::matrix::Shape _shape;
+  nnfw::misc::matrix::Shape _shape;
 
 private:
   const T *_ptr;
diff --git a/runtimes/pure_arm_compute/src/internal/nnapi/tensor/ConstView.h b/runtimes/pure_arm_compute/src/internal/nnapi/tensor/ConstView.h
index 38d1b291b..6a3fff646 100644
--- a/runtimes/pure_arm_compute/src/internal/nnapi/tensor/ConstView.h
+++ b/runtimes/pure_arm_compute/src/internal/nnapi/tensor/ConstView.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file        ConstView.h
+ * @brief       This file contains ConstView class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
 #ifndef __INTERNAL_NNAPI_TENSOR_CONST_VIEW_H__
 #define __INTERNAL_NNAPI_TENSOR_CONST_VIEW_H__
 
@@ -27,21 +33,36 @@ namespace nnapi
 namespace tensor
 {
 
+/**
+ * @brief Wrapper class to read tensor values
+ * @tparam T The tensor element type
+ */
 template <typename T> class ConstView
 {
 public:
-  ConstView(const ::nnfw::util::tensor::Shape &shape, const uint8_t *ptr, size_t len)
+  /**
+   * @brief Construct a ConstView class
+   * @param[in] shape Tensor shape
+   * @param[in] ptr The base pointer of actual data
+   * @param[in] len The number of bytes
+   */
+  ConstView(const ::nnfw::misc::tensor::Shape &shape, const uint8_t *ptr, size_t len)
       : _shape{shape}, _ptr{ptr}, _len{len}
   {
     // DO NOTHING
   }
 
 public:
-  const nnfw::util::tensor::Shape &shape(void) const { return _shape; }
+  const nnfw::misc::tensor::Shape &shape(void) const { return _shape; }
 
 private:
   // TODO Make this as a helper function, and share it for both View<T> and ConstView<T>
-  uint32_t offset_of(const nnfw::util::tensor::Index &index) const
+  /**
+   * @brief Calculate offset for the given tensor index
+   * @param[in] index Tensor index
+   * @return The calculated offset
+   */
+  uint32_t offset_of(const nnfw::misc::tensor::Index &index) const
   {
     if (_shape.rank() == 0)
     {
@@ -61,7 +82,12 @@ private:
   }
 
 public:
-  T at(const nnfw::util::tensor::Index &index) const
+  /**
+   * @brief Get the value on the given index
+   * @param[in] index Flattened tensor index
+   * @return The value on the given index
+   */
+  T at(const nnfw::misc::tensor::Index &index) const
   {
     const auto offset = offset_of(index);
 
@@ -71,7 +97,7 @@ public:
   }
 
 private:
-  const nnfw::util::tensor::Shape _shape;
+  const nnfw::misc::tensor::Shape _shape;
 
 private:
   const uint8_t *const _ptr;
diff --git a/runtimes/pure_arm_compute/src/internal/nnapi/tensor/Reader.h b/runtimes/pure_arm_compute/src/internal/nnapi/tensor/Reader.h
index fe89e572e..cc51db594 100644
--- a/runtimes/pure_arm_compute/src/internal/nnapi/tensor/Reader.h
+++ b/runtimes/pure_arm_compute/src/internal/nnapi/tensor/Reader.h
@@ -14,11 +14,17 @@
  * limitations under the License.
  */
 
+/**
+ * @file        Reader.h
+ * @brief       This file contains Reader class
+ * @ingroup     COM_AI_RUNTIME
+ */
+
 #ifndef __INTERNAL_NNAPI_TENSOR_READER_H__
 #define __INTERNAL_NNAPI_TENSOR_READER_H__
 
 #include <vector>
-#include "util/tensor/Reader.h"
+#include "misc/tensor/Reader.h"
 
 namespace internal
 {
@@ -27,11 +33,20 @@ namespace nnapi
 namespace tensor
 {
 
-template <typename T> class Reader final : public nnfw::util::tensor::Reader<T>
+/**
+ * @brief Wrapper class to read tensor values
+ * @tparam T The tensor element type
+ */
+template <typename T> class Reader final : public nnfw::misc::tensor::Reader<T>
 {
 public:
-  // NOTE The parameter len denotes the number of bytes.
-  Reader(const ::nnfw::util::tensor::Shape &shape, const T *ptr, size_t len)
+  /**
+   * @brief Construct a Reader class
+   * @param[in] shape Tensor shape
+   * @param[in] ptr The base pointer of actual data
+   * @param[in] len The number of bytes
+   */
+  Reader(const ::nnfw::misc::tensor::Shape &shape, const T *ptr, size_t len)
       : _shape{shape}, _ptr{ptr}
   {
     assert(shape.element_nums() * sizeof(T) == len);
@@ -39,10 +54,19 @@ public:
   }
 
 public:
-  const nnfw::util::tensor::Shape &shape(void) const { return _shape; }
+  /**
+   * @brief Get shape object
+   * @return The shape as const reference
+   */
+  const nnfw::misc::tensor::Shape &shape(void) const { return _shape; }
 
 public:
-  T at(const nnfw::util::tensor::Index &index_nnapi) const override
+  /**
+   * @brief Get the value on the given index
+   * @param[in] index_nnapi Flattened tensor index
+   * @return The value on the given index
+   */
+  T at(const nnfw::misc::tensor::Index &index_nnapi) const override
   {
     uint32_t offset = 0;
 
@@ -53,17 +77,19 @@ public:
   }
 
 private:
-  /*
-  Assuming that shape is [d4, .. , d1] and data is stored at a pointer ptr,
-  we need to calculate the offset of index [i4, .. i1] as follows:
-  offset = i4 * (d3 * d2 * d1) +
-           i3 * (d2 * d1) +
-           i2 * (d1) +
-           i1
-  So (d4 * d3 * d2 * d1) or (d3 * d2 * d1) or (d2 * d1) happens whenever offset is calculate.
-  To minimize this repetitive calculation,
-  _stridess[n] contains _spape[n-1]*_spape[n-2]*_spape[0]
-  */
+  /**
+   * @brief Initializes @c _stridess
+   * @return N/A
+   * @note Assuming that shape is [d4, .. , d1] and data is stored at a pointer ptr,
+           we need to calculate the offset of index [i4, .. i1] as follows:
+           offset = i4 * (d3 * d2 * d1) +
+                    i3 * (d2 * d1) +
+                    i2 * (d1) +
+                    i1
+           So (d4 * d3 * d2 * d1) or (d3 * d2 * d1) or (d2 * d1) happens whenever offset is
+           calculate. To minimize this repetitive calculation,
+           _stridess[n] contains _spape[n-1]*_spape[n-2]*_spape[0]
+   */
   void initialize(void)
   {
     for (int r = 0; r < _shape.rank(); r++)
@@ -76,7 +102,7 @@ private:
   }
 
 private:
-  nnfw::util::tensor::Shape _shape;
+  nnfw::misc::tensor::Shape _shape;
 
 private:
   const T *_ptr;
diff --git a/runtimes/pure_arm_compute/src/internal/nnapi/tensor/View.h b/runtimes/pure_arm_compute/src/internal/nnapi/tensor/View.h
index 80e1bb057..f8f297f97 100644
--- a/runtimes/pure_arm_compute/src/internal/nnapi/tensor/View.h
+++ b/runtimes/pure_arm_compute/src/internal/nnapi/tensor/View.h
@@ -14,11 +14,16 @@
  * limitations under the License.
  */
 
+/**
+ * @file    View.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines internal::nnapi::tensor::View class
+ */
 #ifndef __INTERNAL_NNAPI_TENSOR_VIEW_H__
 #define __INTERNAL_NNAPI_TENSOR_VIEW_H__
 
-#include "util/tensor/Shape.h"
-#include "util/tensor/Index.h"
+#include "misc/tensor/Shape.h"
+#include "misc/tensor/Index.h"
 
 namespace internal
 {
@@ -27,20 +32,38 @@ namespace nnapi
 namespace tensor
 {
 
+/**
+ * @brief Class to access tensor's element information using index
+ */
 template <typename T> class View
 {
 public:
+  /**
+   * @brief     Construct a new View object
+   * @param[in] shape Shape of tensor
+   * @param[in] ptr   Pointer to tensor data
+   * @param[in] len   Size of tensor (byte)
+   */
   // NOTE The parameter len denotes the number of bytes.
-  View(const ::nnfw::util::tensor::Shape &shape, T *ptr, size_t len) : _shape{shape}, _ptr{ptr}
+  View(const ::nnfw::misc::tensor::Shape &shape, T *ptr, size_t len) : _shape{shape}, _ptr{ptr}
   {
     assert(shape.element_nums() * sizeof(T) == len);
   }
 
 public:
-  const nnfw::util::tensor::Shape &shape(void) const { return _shape; }
+  /**
+   * @brief   Get shape of tensor
+   * @return  Shape of tensor
+   */
+  const nnfw::misc::tensor::Shape &shape(void) const { return _shape; }
 
 private:
-  uint32_t offset_of(const nnfw::util::tensor::Index &index) const
+  /**
+   * @brief     Get position of element using index in tensor
+   * @param[in] index Index of element
+   * @return    Position of element
+   */
+  uint32_t offset_of(const nnfw::misc::tensor::Index &index) const
   {
     if (_shape.rank() == 0)
     {
@@ -60,14 +83,24 @@ private:
   }
 
 public:
-  T at(const nnfw::util::tensor::Index &index) const
+  /**
+   * @brief     Get value of element at index
+   * @param[in] index Index of element
+   * @return    Value of element at index
+   */
+  T at(const nnfw::misc::tensor::Index &index) const
   {
     const auto offset = offset_of(index);
 
     return _ptr[offset];
   }
 
-  T &at(const nnfw::util::tensor::Index &index)
+  /**
+   * @brief     Get reference of element at index
+   * @param[in] index Index of element
+   * @return    Reference of element at index
+   */
+  T &at(const nnfw::misc::tensor::Index &index)
   {
     const auto offset = offset_of(index);
 
@@ -75,7 +108,7 @@ public:
   }
 
 private:
-  nnfw::util::tensor::Shape _shape;
+  nnfw::misc::tensor::Shape _shape;
 
 private:
   T *_ptr;
diff --git a/runtimes/pure_arm_compute/src/internal/op/Abs.cc b/runtimes/pure_arm_compute/src/internal/op/Abs.cc
new file mode 100644
index 000000000..e23a9538c
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/Abs.cc
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Abs.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Abs
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Abs
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Abs
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+             const uint32_t *outputs)
+{
+  assert(inputCount == 1 && outputCount == 1);
+
+  output_index = outputs[0];
+  input_index = inputs[0];
+}
+
+} // namespace Abs
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/Abs.h b/runtimes/pure_arm_compute/src/internal/op/Abs.h
new file mode 100644
index 000000000..0be8b0205
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/Abs.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_ABS_H__
+#define __INTERNAL_OP_ABS_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Abs
+{
+
+struct Param
+{
+  int32_t output_index;
+  int32_t input_index;
+
+  Param() = default;
+  Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+  Node(const Param &param) : _param(param)
+  {
+    // DO NOTHING
+  }
+
+public:
+  virtual ~Node() = default;
+
+public:
+  const Param &param(void) const { return _param; }
+
+public:
+  void accept(NodeVisitor &&) const override;
+
+private:
+  const Param _param;
+};
+
+} // namespace Abs
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_ABS_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/Add.h b/runtimes/pure_arm_compute/src/internal/op/Add.h
index 42ed5b976..a7804a569 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Add.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Add.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file Add.h
+ * @brief This file contains accept function and params for Add operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
 #ifndef __INTERNAL_OP_ADD_H__
 #define __INTERNAL_OP_ADD_H__
 
@@ -30,33 +36,66 @@ namespace op
 namespace Add
 {
 
+/**
+ * @brief Struct of Add operation's param
+ */
 struct Param
 {
-  int32_t ofm_index;
+  int32_t ofm_index; /**< Output format index */
 
-  int32_t lhs_index;
-  int32_t rhs_index;
-  int32_t activation_index;
+  int32_t lhs_index;        /**< Left hand side index */
+  int32_t rhs_index;        /**< Right hand side index */
+  int32_t activation_index; /**< Activation index */
 
+  /**
+   * @brief Construct a new Param object for Add as default
+   */
   Param() = default;
+
+  /**
+   * @brief Construct a new Param object for Add with params
+   * @param [in] inputCount The number of input
+   * @param [in] inputs Array containing inputs
+   * @param [in] outputCount The number of output
+   * @param [in] outputs Array containing outputs
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to define operation node for Add
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Node object for Add with param
+   * @param [in] param Parameters for Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Destroy the Node object for Add
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief Get parameters for Add
+   * @return Parameters of Add
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Function for accepting node for Add
+   * @param [in] v Node visitor for invoking visit function of Add
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/ArgMax.cc b/runtimes/pure_arm_compute/src/internal/op/ArgMax.cc
new file mode 100644
index 000000000..485430377
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/ArgMax.cc
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "internal/op/ArgMax.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ArgMax
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace ArgMax
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ArgMax
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+             const uint32_t *outputs)
+{
+  assert(inputCount == 2 && outputCount == 1);
+
+  ofm_index = outputs[0];
+
+  // Each input should be interpreted as follows:
+  //
+  //  0 -> Input Tensor Index
+  //  1 -> Axis Tensor Index
+  ifm_index = inputs[0];
+  axis_index = inputs[1];
+}
+
+} // namespace ArgMax
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/ArgMax.h b/runtimes/pure_arm_compute/src/internal/op/ArgMax.h
new file mode 100644
index 000000000..780af2232
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/ArgMax.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_ARGMAX_H__
+#define __INTERNAL_OP_ARGMAX_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ArgMax
+{
+
+struct Param
+{
+  int32_t ofm_index;
+
+  int32_t ifm_index;
+  int32_t axis_index;
+
+  Param() = default;
+  Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+  Node(const Param &param) : _param(param)
+  {
+    // DO NOTHING
+  }
+
+public:
+  virtual ~Node() = default;
+
+public:
+  const Param &param(void) const { return _param; }
+
+public:
+  void accept(NodeVisitor &&) const override;
+
+private:
+  const Param _param;
+};
+
+} // namespace ArgMax
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_ARGMAX_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/AvgPool2D.h b/runtimes/pure_arm_compute/src/internal/op/AvgPool2D.h
index 729f6043c..cf9061ca9 100644
--- a/runtimes/pure_arm_compute/src/internal/op/AvgPool2D.h
+++ b/runtimes/pure_arm_compute/src/internal/op/AvgPool2D.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file    AvgPool2D.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines internal::tflite::op::AvgPool2D Param structs
+ *          and internal::tflite::op::AvgPool2D Node classes
+ */
 #ifndef __INTERNAL_OP_AVG_POOL_2D_H__
 #define __INTERNAL_OP_AVG_POOL_2D_H__
 
@@ -32,44 +38,75 @@ namespace AvgPool2D
 namespace Explicit
 {
 
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
 struct Param
 {
-  int32_t ofm_index;
+  int32_t ofm_index; /**< Index of output feature map */
 
-  int32_t ifm_index;
+  int32_t ifm_index; /**< Index of input feature map */
 
-  int32_t kw_index;
-  int32_t kh_index;
+  int32_t kw_index; /**< Index of kernel width */
+  int32_t kh_index; /**< Index of kernel height */
 
-  int32_t hstride_index;
-  int32_t vstride_index;
+  int32_t hstride_index; /**< Index of horizontal stride */
+  int32_t vstride_index; /**< Index of vertical stride */
 
-  int32_t padding_left_index;
-  int32_t padding_right_index;
-  int32_t padding_top_index;
-  int32_t padding_bottom_index;
-
-  int32_t activation_index;
+  int32_t padding_left_index;   /**< Index of padding left */
+  int32_t padding_right_index;  /**< Index of padding right */
+  int32_t padding_top_index;    /**< Index of padding top */
+  int32_t padding_bottom_index; /**< Index of padding bottom */
 
+  int32_t activation_index; /**< Index of activation */
+  /**
+   * @brief Construct as default
+   */
   Param() = default;
+  /**
+   * @brief Construct a new Param object with params
+   * @param[in] inputCount  Count of inputs
+   * @param[in] inputs      Pointer of inputs
+   * @param[in] outputCount Count of outputs
+   * @param[in] outputs     Pointer of outputs
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to represent an operation of data structure
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Node object with param
+   * @param[in] param Param object that makes up a Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Destruct as default
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief  Get a reference of Param object
+   * @return Reference of Param object
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Visit this Node by NodeVisitor
+   * @param[in] v Visitor
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
@@ -81,40 +118,71 @@ private:
 namespace Implicit
 {
 
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
 struct Param
 {
-  int32_t ofm_index;
-
-  int32_t ifm_index;
+  int32_t ofm_index; /**< Index of output feature map */
 
-  int32_t kw_index;
-  int32_t kh_index;
+  int32_t ifm_index; /**< Index of input feature map */
 
-  int32_t hstride_index;
-  int32_t vstride_index;
+  int32_t kw_index; /**< Index of kernel width */
+  int32_t kh_index; /**< Index of kernel height */
 
-  int32_t padding_index;
-  int32_t activation_index;
+  int32_t hstride_index; /**< Index of horizontal stride */
+  int32_t vstride_index; /**< Index of vertical stride */
 
+  int32_t padding_index;    /**< Index of padding */
+  int32_t activation_index; /**< Index of activation */
+  /**
+   * @brief Construct as default
+   */
   Param() = default;
+  /**
+   * @brief Construct a new Param object with params
+   * @param[in] inputCount  Count of inputs
+   * @param[in] inputs      Pointer of inputs
+   * @param[in] outputCount Count of outputs
+   * @param[in] outputs     Pointer of outputs
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to represent an operation of data structure
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Node object with param
+   * @param[in] param Param object that makes up a Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Destruct as default
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief  Get a reference of Param object
+   * @return Reference of Param object
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Visit this Node by NodeVisitor
+   * @param[in] v Visitor
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/BatchToSpaceNd.cc b/runtimes/pure_arm_compute/src/internal/op/BatchToSpaceNd.cc
new file mode 100644
index 000000000..0768039d0
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/BatchToSpaceNd.cc
@@ -0,0 +1,63 @@
+/*Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "internal/op/BatchToSpaceNd.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace BatchToSpaceNd
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace BatchToSpaceNd
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace BatchToSpaceNd
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+             const uint32_t *outputs)
+{
+  assert(inputCount == 2 && outputCount == 1);
+
+  output_index = outputs[0];
+
+  // Each input should be interpreted as follows:
+  //
+  //  0 -> Input Tensor Index
+  //  1 -> Block size Index
+  input_index = inputs[0];
+  block_size_index = inputs[1];
+}
+
+} // namespace BatchToSpaceNd
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/BatchToSpaceNd.h b/runtimes/pure_arm_compute/src/internal/op/BatchToSpaceNd.h
new file mode 100644
index 000000000..a514cb44c
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/BatchToSpaceNd.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_BATCHTOSPACE_ND_H__
+#define __INTERNAL_OP_BATCHTOSPACE_ND_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace BatchToSpaceNd
+{
+
+struct Param
+{
+  int32_t output_index;
+
+  int32_t input_index;
+  int32_t block_size_index;
+
+  Param() = default;
+  Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+} // namespace BatchToSpaceNd
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace BatchToSpaceNd
+{
+class Node final : public op::Node
+{
+public:
+  Node(const Param &param) : _param(param)
+  {
+    // DO NOTHING
+  }
+
+public:
+  virtual ~Node() = default;
+
+public:
+  const Param &param(void) const { return _param; }
+
+public:
+  void accept(NodeVisitor &&) const override;
+
+private:
+  const Param _param;
+};
+
+} // namespace BatchToSpaceNd
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_BATCHTOSPACE_Nd_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/Cast.h b/runtimes/pure_arm_compute/src/internal/op/Cast.h
index 3b3795189..8af741a16 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Cast.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Cast.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file Cast.h
+ * @brief This file contains accept function and params for Cast operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
 #ifndef __INTERNAL_OP_CAST_H__
 #define __INTERNAL_OP_CAST_H__
 
@@ -30,31 +36,64 @@ namespace op
 namespace Cast
 {
 
+/**
+ * @brief Struct of Cast operation's param
+ */
 struct Param
 {
-  int32_t output_index;
+  int32_t output_index; /**< Output index */
 
-  int32_t input_index;
+  int32_t input_index; /**< Input index */
 
+  /**
+   * @brief Construct a new Param object for Cast as default
+   */
   Param() = default;
+
+  /**
+   * @brief Construct a new Param object for Cast with params
+   * @param [in] inputCount The number of input
+   * @param [in] inputs Array containing inputs
+   * @param [in] outputCount The number of output
+   * @param [in] outputs Array containing outputs
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to define operation node for Cast
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Node object for Cast with param
+   * @param [in] param Parameters for Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Destroy the Node object for Cast
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief Get parameters for Cast
+   * @return Parameters of Cast
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Function for accepting node for Cast
+   * @param [in] v Node visitor for invoking visit function of Cast
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/Concat.h b/runtimes/pure_arm_compute/src/internal/op/Concat.h
index 185cba3e1..207f964fb 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Concat.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Concat.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file Concat.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines Concat node
+ */
+
 #ifndef __INTERNAL_OP_CONCAT_H__
 #define __INTERNAL_OP_CONCAT_H__
 
@@ -31,36 +37,68 @@ namespace op
 namespace Concat
 {
 
+/**
+ * @brief Struct to manipulate parameter for Concat operation
+ */
 struct Param
 {
-  int32_t ofm_index;
+  int32_t ofm_index; //!< index for output
 
-  std::vector<int32_t> ifm_indexes;
-  int32_t axis_index;
+  std::vector<int32_t> ifm_indexes; //!< index for input
+  int32_t axis_index;               //!< index for axis
 
+  /**
+   * @brief Default Constructor
+   */
   Param() = default;
+  /**
+   * @brief Construct a new Param object
+   * @param[in] inputCount the number of inputs
+   * @param[in] inputs pointer for input data
+   * @param[in] outputCount the number of outputs
+   * @param[in] outputs pointer for input data
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to define Concat Operation
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Concat Node object
+   * @param param Parameter for Concat Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Default Destructor
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief Get parameter
+   * @return Param reference
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Accept a NodeVisitor so that it can visit this node
+   * @param [in] v Visitor
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
-  const Param _param;
+  const Param _param; //!< parameter for Concat node
 };
 
 } // namespace Concat
diff --git a/runtimes/pure_arm_compute/src/internal/op/Conv2D.h b/runtimes/pure_arm_compute/src/internal/op/Conv2D.h
index b04b8c85f..de46fbb9c 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Conv2D.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Conv2D.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file Conv2D.h
+ * @brief This file contains accept function and params for Conv2D operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
 #ifndef __INTERNAL_OP_CONV_2D_H__
 #define __INTERNAL_OP_CONV_2D_H__
 
@@ -32,43 +38,76 @@ namespace Conv2D
 namespace Explicit
 {
 
+/**
+ * @brief Struct of Conv2D(explicit) operation's param
+ */
 struct Param
 {
-  int32_t ofm_index;
+  int32_t ofm_index; /**< Output format index */
 
-  int32_t ifm_index;
-  int32_t ker_index;
-  int32_t bias_index;
+  int32_t ifm_index;  /**< Input format index */
+  int32_t ker_index;  /**< Kernel index */
+  int32_t bias_index; /**< Bias index */
 
-  int32_t hstride_index;
-  int32_t vstride_index;
+  int32_t hstride_index; /**< Horizontal stride index */
+  int32_t vstride_index; /**< Vertical stride index */
 
-  int32_t padding_left_index;
-  int32_t padding_right_index;
-  int32_t padding_top_index;
-  int32_t padding_bottom_index;
+  int32_t padding_left_index;   /**< Left padding index */
+  int32_t padding_right_index;  /**< Right padding index */
+  int32_t padding_top_index;    /**< Top padding index */
+  int32_t padding_bottom_index; /**< Bottomd padding index */
 
-  int32_t activation_index;
+  int32_t activation_index; /**< Activation index */
 
+  /**
+   * @brief Construct a new Param object for Conv2D(explicit) as default
+   */
   Param() = default;
+
+  /**
+   * @brief Construct a new Param object for Conv2D(explicit) with params
+   * @param [in] inputCount The number of input
+   * @param [in] inputs Array containing inputs
+   * @param [in] outputCount The number of output
+   * @param [in] outputs Array containing outputs
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to define operation node for Conv2D(explicit)
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Node object for conv2D(explicit) with param
+   * @param [in] param Parameters for Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Destroy the Node object for conv2D(explicit)
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief Get parameters for conv2D(explicit)
+   * @return Parameters of conv2D(explicit)
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Function for accepting node for conv2D(explicit)
+   * @param [in] v Node visitor for invoking visit function of conv2D(explicit)
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
@@ -80,39 +119,72 @@ private:
 namespace Implicit
 {
 
+/**
+ * @brief Struct of Conv2D(implicit) operation's param
+ */
 struct Param
 {
-  int32_t ofm_index;
+  int32_t ofm_index; /**< Output format index */
 
-  int32_t ifm_index;
-  int32_t ker_index;
-  int32_t bias_index;
+  int32_t ifm_index;  /**< Input format index */
+  int32_t ker_index;  /**< Kernel index */
+  int32_t bias_index; /**< Bias index */
 
-  int32_t hstride_index;
-  int32_t vstride_index;
+  int32_t hstride_index; /**< Horizontal stride index */
+  int32_t vstride_index; /**< Vertical stride index */
 
-  int32_t padding_index;
-  int32_t activation_index;
+  int32_t padding_index;    /**< Padding index */
+  int32_t activation_index; /**< Activation index */
 
+  /**
+   * @brief Construct a new Param object for Conv2D(implicit) as default
+   */
   Param() = default;
+
+  /**
+   * @brief Construct a new Param object for Conv2D(implicit) with params
+   * @param [in] inputCount The number of input
+   * @param [in] inputs Array containing inputs
+   * @param [in] outputCount The number of output
+   * @param [in] outputs Array containing outputs
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to define operation node for Conv2D(implicit)
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Node object for conv2D(implicit) with param
+   * @param [in] param Parameters for Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Destroy the Node object for conv2D(implicit)
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief Get parameters for conv2D(implicit)
+   * @return Parameters of conv2D(implicit)
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Function for accepting node for conv2D(implicit)
+   * @param [in] v Node visitor for invoking visit function of conv2D(implicit)
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/DepthToSpace.cc b/runtimes/pure_arm_compute/src/internal/op/DepthToSpace.cc
new file mode 100644
index 000000000..db164a148
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/DepthToSpace.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/DepthToSpace.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace DepthToSpace
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace DepthToSpace
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace DepthToSpace
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+             const uint32_t *outputs)
+{
+  assert(inputCount == 2 && outputCount == 1);
+
+  output_index = outputs[0];
+
+  // Each input should be interpreted as follows:
+  //
+  //  0 -> Input Tensor Index
+  //  1 -> Block size Index
+  input_index = inputs[0];
+  block_size_index = inputs[1];
+}
+
+} // namespace DepthToSpace
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/DepthToSpace.h b/runtimes/pure_arm_compute/src/internal/op/DepthToSpace.h
new file mode 100644
index 000000000..dd4c5c914
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/DepthToSpace.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_DEPTHTOSPACE_H__
+#define __INTERNAL_OP_DEPTHTOSPACE_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace DepthToSpace
+{
+
+struct Param
+{
+  int32_t output_index;
+
+  int32_t input_index;
+  int32_t block_size_index;
+
+  Param() = default;
+  Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+  Node(const Param &param) : _param(param)
+  {
+    // DO NOTHING
+  }
+
+public:
+  virtual ~Node() = default;
+
+public:
+  const Param &param(void) const { return _param; }
+
+public:
+  void accept(NodeVisitor &&) const override;
+
+private:
+  const Param _param;
+};
+
+} // namespace DepthToSpace
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_DEPTHTOSPACE_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.h b/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.h
index 77ab4b63e..c63e30aae 100644
--- a/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.h
+++ b/runtimes/pure_arm_compute/src/internal/op/DepthwiseConv2D.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file    DepthwiseConv2D.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines internal::tflite::op::DepthwiseConv2D Param structs
+ *          and internal::tflite::op::DepthwiseConv2D Node classes
+ */
 #ifndef __INTERNAL_OP_DEPTHWISE_CONV_2D_H__
 #define __INTERNAL_OP_DEPTHWISE_CONV_2D_H__
 
@@ -32,44 +38,75 @@ namespace DepthwiseConv2D
 namespace Explicit
 {
 
+/**
+ * @brief Struct to have indexes for explicit padding DepthwiseConv2D operation parameter
+ */
 struct Param
 {
-  int32_t ofm_index;
+  int32_t ofm_index; /**< Index of output feature map */
 
-  int32_t ifm_index;
-  int32_t ker_index;
-  int32_t bias_index;
+  int32_t ifm_index;  /**< Index of input feature map */
+  int32_t ker_index;  /**< Index of kernel */
+  int32_t bias_index; /**< Index of bias */
 
-  int32_t hstride_index;
-  int32_t vstride_index;
+  int32_t hstride_index; /**< Index of horizontal stride */
+  int32_t vstride_index; /**< Index of vertical stride */
 
-  int32_t padding_left_index;
-  int32_t padding_right_index;
-  int32_t padding_top_index;
-  int32_t padding_bottom_index;
-
-  int32_t multipler_index;
-  int32_t activation_index;
+  int32_t padding_left_index;   /**< Index of padding left */
+  int32_t padding_right_index;  /**< Index of padding right */
+  int32_t padding_top_index;    /**< Index of padding top */
+  int32_t padding_bottom_index; /**< Index of padding bottom */
 
+  int32_t multipler_index;  /**< Index of multipler */
+  int32_t activation_index; /**< Index of activation */
+  /**
+   * @brief Construct as default
+   */
   Param() = default;
+  /**
+   * @brief Construct a new Param object with params
+   * @param[in] inputCount  Count of inputs
+   * @param[in] inputs      Pointer of inputs
+   * @param[in] outputCount Count of outputs
+   * @param[in] outputs     Pointer of outputs
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to represent an explicit padding DepthwiseConv2D operation of data structure
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Node object with param
+   * @param[in] param Param object that makes up a Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Destruct as default
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief  Get a reference of Param object
+   * @return Reference of Param object
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Visit this Node by NodeVisitor
+   * @param[in] v Visitor
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
@@ -78,43 +115,74 @@ private:
 
 } // namespace Explicit
 
+/**
+ * @brief Struct to have indexes for implicit padding DepthwiseConv2D operation parameter
+ */
 namespace Implicit
 {
 
 struct Param
 {
-  int32_t ofm_index;
-
-  int32_t ifm_index;
-  int32_t ker_index;
-  int32_t bias_index;
+  int32_t ofm_index; /**< Index of output feature map */
 
-  int32_t hstride_index;
-  int32_t vstride_index;
+  int32_t ifm_index;  /**< Index of input feature map */
+  int32_t ker_index;  /**< Index of kernel */
+  int32_t bias_index; /**< Index of bias */
 
-  int32_t padding_index;
-  int32_t multipler_index;
-  int32_t activation_index;
+  int32_t hstride_index; /**< Index of horizontal stride */
+  int32_t vstride_index; /**< Index of vertical stride */
 
+  int32_t padding_index;    /**< Index of padding */
+  int32_t multipler_index;  /**< Index of multipler */
+  int32_t activation_index; /**< Index of activation */
+  /**
+   * @brief Construct as default
+   */
   Param() = default;
+  /**
+   * @brief Construct a new Param object with params
+   * @param[in] inputCount  Count of inputs
+   * @param[in] inputs      Pointer of inputs
+   * @param[in] outputCount Count of outputs
+   * @param[in] outputs     Pointer of outputs
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to represent an implicit padding DepthwiseConv2D operation of data structure
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Node object with param
+   * @param[in] param Param object that makes up a Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Destruct as default
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief  Get a reference of Param object
+   * @return Reference of Param object
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Visit this Node by NodeVisitor
+   * @param[in] v Visitor
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/Dequantize.h b/runtimes/pure_arm_compute/src/internal/op/Dequantize.h
index b0645d136..f19898e9e 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Dequantize.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Dequantize.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file    Dequantize.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines internal::tflite::op::Dequantize::Param struct
+ *          and internal::tflite::op::Dequantize::Node class
+ */
 #ifndef __INTERNAL_OP_DEQUANTIZE_H__
 #define __INTERNAL_OP_DEQUANTIZE_H__
 
@@ -30,31 +36,62 @@ namespace op
 namespace Dequantize
 {
 
+/**
+ * @brief Struct to have indexes for Dequantize operation parameter
+ */
 struct Param
 {
-  int32_t output_index;
-
-  int32_t input_index;
+  int32_t output_index; /**< Index of output feature map */
 
+  int32_t input_index; /**< Index of input feature map */
+  /**
+   * @brief Construct as default
+   */
   Param() = default;
+  /**
+   * @brief Construct a new Param object with params
+   * @param[in] inputCount  Count of inputs
+   * @param[in] inputs      Pointer of inputs
+   * @param[in] outputCount Count of outputs
+   * @param[in] outputs     Pointer of outputs
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to represent an Dequantize operation of data structure
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Node object with param
+   * @param[in] param Param object that makes up a Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Destruct as default
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief  Get a reference of Param object
+   * @return Reference of Param object
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Visit this Node by NodeVisitor
+   * @param[in] v Visitor
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/Div.h b/runtimes/pure_arm_compute/src/internal/op/Div.h
index 06ed7ec21..d5fc09d19 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Div.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Div.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file    Div.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines internal::tflite::op::Div::Param struct
+ *          and internal::tflite::op::Div::Node class
+ */
 #ifndef __INTERNAL_OP_DIV_H__
 #define __INTERNAL_OP_DIV_H__
 
@@ -30,33 +36,64 @@ namespace op
 namespace Div
 {
 
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
 struct Param
 {
-  int32_t ofm_index;
-
-  int32_t lhs_index;
-  int32_t rhs_index;
-  int32_t activation_index;
+  int32_t ofm_index; /**< Index of output feature map */
 
+  int32_t lhs_index;        /**< Index of lhs */
+  int32_t rhs_index;        /**< Index of rhs */
+  int32_t activation_index; /**< Index of activation */
+  /**
+   * @brief Construct as default
+   */
   Param() = default;
+  /**
+   * @brief Construct a new Param object with params
+   * @param[in] inputCount  Count of inputs
+   * @param[in] inputs      Pointer of inputs
+   * @param[in] outputCount Count of outputs
+   * @param[in] outputs     Pointer of outputs
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to represent an operation of data structure
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Node object with param
+   * @param[in] param Param object that makes up a Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Destruct as default
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief  Get a reference of Param object
+   * @return Reference of Param object
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Visit this Node by NodeVisitor
+   * @param[in] v Visitor
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/EmbeddingLookup.h b/runtimes/pure_arm_compute/src/internal/op/EmbeddingLookup.h
index 4547f27c7..17e8485f7 100644
--- a/runtimes/pure_arm_compute/src/internal/op/EmbeddingLookup.h
+++ b/runtimes/pure_arm_compute/src/internal/op/EmbeddingLookup.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file EmbeddingLookup.h
+ * @brief This file contains accept function and params for EmbeddingLookup operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
 #ifndef __INTERNAL_OP_EMBEDDING_LOOKUP_H__
 #define __INTERNAL_OP_EMBEDDING_LOOKUP_H__
 
@@ -30,32 +36,65 @@ namespace op
 namespace EmbeddingLookup
 {
 
+/**
+ * @brief Struct of EmbeddingLookup operation's param
+ */
 struct Param
 {
-  int32_t output_index;
+  int32_t output_index; /**< Output index */
 
-  int32_t lookups_index;
-  int32_t values_index;
+  int32_t lookups_index; /**< Lookups index */
+  int32_t values_index;  /**< Values index */
 
+  /**
+   * @brief Construct a new Param object for EmbeddingLookup as default
+   */
   Param() = default;
+
+  /**
+   * @brief Construct a new Param object for EmbeddingLookup with params
+   * @param [in] inputCount The number of input
+   * @param [in] inputs Array containing inputs
+   * @param [in] outputCount The number of output
+   * @param [in] outputs Array containing outputs
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to define operation node for EmbeddingLookup
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Node object for EmbeddingLookup with param
+   * @param [in] param Parameters for Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Destroy the Node object for EmbeddingLookup
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief Get parameters for EmbeddingLookup
+   * @return Parameters of EmbeddingLookup
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Function for accepting node for EmbeddingLookup
+   * @param [in] v Node visitor for invoking visit function of EmbeddingLookup
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/Equal.cc b/runtimes/pure_arm_compute/src/internal/op/Equal.cc
new file mode 100644
index 000000000..b9cccc6a9
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/Equal.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Equal.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Equal
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Equal
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Equal
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+             const uint32_t *outputs)
+{
+  assert(inputCount == 2 && outputCount == 1);
+
+  output_index = outputs[0];
+
+  // Each input should be interpreted as follows:
+  //
+  //  0 -> input1 Tensor Index
+  //  1 -> input2 Tensor Index
+  input1_index = inputs[0];
+  input2_index = inputs[1];
+}
+
+} // namespace Equal
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/Equal.h b/runtimes/pure_arm_compute/src/internal/op/Equal.h
new file mode 100644
index 000000000..78b9f846f
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/Equal.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_EQUAL_H__
+#define __INTERNAL_OP_EQUAL_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Equal
+{
+
+struct Param
+{
+  int32_t output_index;
+
+  int32_t input1_index;
+  int32_t input2_index;
+
+  Param() = default;
+  Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+} // namespace Equal
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Equal
+{
+class Node final : public op::Node
+{
+public:
+  Node(const Param &param) : _param(param)
+  {
+    // DO NOTHING
+  }
+
+public:
+  virtual ~Node() = default;
+
+public:
+  const Param &param(void) const { return _param; }
+
+public:
+  void accept(NodeVisitor &&) const override;
+
+private:
+  const Param _param;
+};
+
+} // namespace Equal
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_EQUAL_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/Exp.cc b/runtimes/pure_arm_compute/src/internal/op/Exp.cc
new file mode 100644
index 000000000..6f1aa8f42
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/Exp.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Exp.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Exp
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Exp
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Exp
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+             const uint32_t *outputs)
+{
+  assert(inputCount == 1 && outputCount == 1);
+
+  ofm_index = outputs[0];
+
+  // Each input should be interpreted as follows:
+  //
+  //  0 -> Input Tensor Index
+  ifm_index = inputs[0];
+}
+
+} // namespace Exp
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/Exp.h b/runtimes/pure_arm_compute/src/internal/op/Exp.h
new file mode 100644
index 000000000..ac7f244b7
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/Exp.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_EXP_H__
+#define __INTERNAL_OP_EXP_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Exp
+{
+
+struct Param
+{
+  int32_t ofm_index;
+
+  int32_t ifm_index;
+
+  Param() = default;
+  Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+  Node(const Param &param) : _param(param)
+  {
+    // DO NOTHING
+  }
+
+public:
+  virtual ~Node() = default;
+
+public:
+  const Param &param(void) const { return _param; }
+
+public:
+  void accept(NodeVisitor &&) const override;
+
+private:
+  const Param _param;
+};
+
+} // namespace Exp
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_EXP_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/Floor.h b/runtimes/pure_arm_compute/src/internal/op/Floor.h
index 8cf2a841c..5264ec10c 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Floor.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Floor.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file Floor.h
+ * @brief This file contains accept function and params for Floor operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
 #ifndef __INTERNAL_OP_FLOOR_H__
 #define __INTERNAL_OP_FLOOR_H__
 
@@ -30,31 +36,64 @@ namespace op
 namespace Floor
 {
 
+/**
+ * @brief Struct of Floor operation's param
+ */
 struct Param
 {
-  int32_t output_index;
+  int32_t output_index; /**< Output index */
 
-  int32_t input_index;
+  int32_t input_index; /**< Input index */
 
+  /**
+   * @brief Construct a new Param object for Floor as default
+   */
   Param() = default;
+
+  /**
+   * @brief Construct a new Param object for Floor with params
+   * @param [in] inputCount The number of input
+   * @param [in] inputs Array containing inputs
+   * @param [in] outputCount The number of output
+   * @param [in] outputs Array containing outputs
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to define operation node for Floor
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Node object for Floor with param
+   * @param [in] param Parameters for Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Destroy the Node object for Floor
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief Get parameters for Floor
+   * @return Parameters of Floor
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Function for accepting node for Floor
+   * @param [in] v Node visitor for invoking visit function of Floor
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/FullyConnected.h b/runtimes/pure_arm_compute/src/internal/op/FullyConnected.h
index 7a425a6af..434308435 100644
--- a/runtimes/pure_arm_compute/src/internal/op/FullyConnected.h
+++ b/runtimes/pure_arm_compute/src/internal/op/FullyConnected.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file FullyConnected.h
+ * @brief This file contains accept function and params for FullyConnected operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
 #ifndef __INTERNAL_OP_FULLY_CONNTECTED_H__
 #define __INTERNAL_OP_FULLY_CONNTECTED_H__
 
@@ -30,34 +36,70 @@ namespace op
 namespace FullyConnected
 {
 
+/**
+ * @brief Struct of FullyConnected operation's param
+ */
 struct Param
 {
-  int32_t output_index;
+  int32_t output_index; /**< Output index */
 
-  int32_t input_index;
-  int32_t weight_index;
-  int32_t bias_index;
-  int32_t activation_index;
+  int32_t input_index;      /**< Input index */
+  int32_t weight_index;     /**< Weight index */
+  int32_t bias_index;       /**< Bias index */
+  int32_t activation_index; /**< Activation index */
 
+  /**
+   * @brief Construct a new Param object for FullyConnected as default
+   */
   Param() = default;
+
+  /**
+   * @brief Construct a new Param object for FullyConnected with params
+   * @param [in] inputCount The number of input
+   * @param [in] inputs Array containing inputs
+   * @param [in] outputCount The number of output
+   * @param [in] outputs Array containing outputs
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to define operation node for FullyConnected
+ */
 class Node final : public op::Node
 {
+  /**
+   * @brief Construct a new Node object for FullyConnected with param
+   * @param [in] param Parameters for Node
+   */
 public:
+  /**
+   * @brief Destroy the Node object for FullyConnected
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Destroy the Node object for FullyConnected
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief Parameter Get parameters for FullyConnected
+   * @return _param Parameters of FullyConnected
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Function for accepting node for FullyConnected
+   * @param [in] v Node visitor for invoking visit function of FullyConnected
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/Gather.h b/runtimes/pure_arm_compute/src/internal/op/Gather.h
index 5f7fe956f..4470236eb 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Gather.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Gather.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file Gather.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines Gather operation
+ */
+
 #ifndef __INTERNAL_OP_GATHER_H__
 #define __INTERNAL_OP_GATHER_H__
 
@@ -30,37 +36,69 @@ namespace op
 namespace Gather
 {
 
+/**
+ * @brief Struct to manipulate parameter for Gather operation
+ */
 struct Param
 {
-  int32_t ofm_index; // output
+  int32_t ofm_index; //!< index for output feature map
 
-  int32_t lhs_index;  // input
-  int32_t rhs_index;  // indexes
-  int32_t axis_index; // axis
+  int32_t lhs_index;  //!< index for lhs tensor
+  int32_t rhs_index;  //!< index for rhs tensor
+  int32_t axis_index; //!< index for axis
 
+  /**
+   * @brief Default Constructor
+   */
   Param() = default;
+  /**
+   * @brief Construct a new Param object
+   * @param[in] inputCount the number of inputs
+   * @param[in] inputs pointer for input data
+   * @param[in] outputCount the number of outputs
+   * @param[in] outputs pointer for input data
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to define Gather Operation
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Tanh Node object
+   * @param param Parameter for Tanh Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Default Destructor
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief Get parameter
+   * @return Param reference
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Accept a NodeVisitor so that it can visit this node
+   * @param [in] v Visitor
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
-  const Param _param;
+  const Param _param; //!< parameter for Gather node
 };
 
 } // namespace Gather
diff --git a/runtimes/pure_arm_compute/src/internal/op/HashtableLookup.cc b/runtimes/pure_arm_compute/src/internal/op/HashtableLookup.cc
index 30a853a64..7e04ecf82 100644
--- a/runtimes/pure_arm_compute/src/internal/op/HashtableLookup.cc
+++ b/runtimes/pure_arm_compute/src/internal/op/HashtableLookup.cc
@@ -1,52 +1,68 @@
-#include "internal/op/HashtableLookup.h"
-#include "internal/op/NodeVisitor.h"
-
-#include <cassert>
-
-namespace internal
-{
-namespace tflite
-{
-namespace op
-{
-namespace HashtableLookup
-{
-
-void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
-
-} // namespace HashtableLookup
-} // namespace op
-} // namespace tflite
-} // namespace internal
-
-namespace internal
-{
-namespace tflite
-{
-namespace op
-{
-namespace HashtableLookup
-{
-
-Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
-             const uint32_t *outputs)
-{
-  assert(inputCount == 3 && outputCount == 2);
-
-  output_index = outputs[0];
-  hits_index = outputs[1];
-
-  // Each input should be interpreted as follows:
-  //
-  //  0 -> Lookups Index
-  //  1 -> Keys Index
-  //  2 -> Values Index
-  lookups_index = inputs[0];
-  keys_index = inputs[1];
-  values_index = inputs[2];
-}
-
-} // namespace HashtableLookup
-} // namespace op
-} // namespace tflite
-} // namespace internal
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/HashtableLookup.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace HashtableLookup
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace HashtableLookup
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace HashtableLookup
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+             const uint32_t *outputs)
+{
+  assert(inputCount == 3 && outputCount == 2);
+
+  output_index = outputs[0];
+  hits_index = outputs[1];
+
+  // Each input should be interpreted as follows:
+  //
+  //  0 -> Lookups Index
+  //  1 -> Keys Index
+  //  2 -> Values Index
+  lookups_index = inputs[0];
+  keys_index = inputs[1];
+  values_index = inputs[2];
+}
+
+} // namespace HashtableLookup
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/HashtableLookup.h b/runtimes/pure_arm_compute/src/internal/op/HashtableLookup.h
index 192da2aae..a5b43d1c7 100644
--- a/runtimes/pure_arm_compute/src/internal/op/HashtableLookup.h
+++ b/runtimes/pure_arm_compute/src/internal/op/HashtableLookup.h
@@ -1,56 +1,109 @@
-#ifndef __INTERNAL_OP_HASHTABLE_LOOKUP_H__
-#define __INTERNAL_OP_HASHTABLE_LOOKUP_H__
-
-#include "internal/op/Node.h"
-
-#include <cstdint>
-
-namespace internal
-{
-namespace tflite
-{
-namespace op
-{
-namespace HashtableLookup
-{
-
-struct Param
-{
-  int32_t output_index;
-  int32_t hits_index;
-
-  int32_t lookups_index;
-  int32_t values_index;
-  int32_t keys_index;
-
-  Param() = default;
-  Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
-};
-
-class Node final : public op::Node
-{
-public:
-  Node(const Param &param) : _param(param)
-  {
-    // DO NOTHING
-  }
-
-public:
-  virtual ~Node() = default;
-
-public:
-  const Param &param(void) const { return _param; }
-
-public:
-  void accept(NodeVisitor &&) const override;
-
-private:
-  const Param _param;
-};
-
-} // namespace HashtableLookup
-} // namespace op
-} // namespace tflite
-} // namespace internal
-
-#endif // __INTERNAL_OP_HASHTABLE_LOOKUP_H__
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file    HashtableLookup.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines internal::tflite::op::HashtableLookup::Param struct
+ *          and internal::tflite::op::HashtableLookup::Node class
+ */
+#ifndef __INTERNAL_OP_HASHTABLE_LOOKUP_H__
+#define __INTERNAL_OP_HASHTABLE_LOOKUP_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace HashtableLookup
+{
+
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
+struct Param
+{
+  int32_t output_index; /**< Index of output feature map */
+  int32_t hits_index;   /**< Index of hits */
+
+  int32_t lookups_index; /**< Index of lookups */
+  int32_t values_index;  /**< Index of values */
+  int32_t keys_index;    /**< Index of keys */
+  /**
+   * @brief Construct as default
+   */
+  Param() = default;
+  /**
+   * @brief Construct a new Param object with params
+   * @param[in] inputCount  Count of inputs
+   * @param[in] inputs      Pointer of inputs
+   * @param[in] outputCount Count of outputs
+   * @param[in] outputs     Pointer of outputs
+   */
+  Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to represent an operation of data structure
+ */
+class Node final : public op::Node
+{
+public:
+  /**
+   * @brief Construct a new Node object with param
+   * @param[in] param Param object that makes up a Node
+   */
+  Node(const Param &param) : _param(param)
+  {
+    // DO NOTHING
+  }
+
+public:
+  /**
+   * @brief Destruct as default
+   */
+  virtual ~Node() = default;
+
+public:
+  /**
+   * @brief  Get a reference of Param object
+   * @return Reference of Param object
+   */
+  const Param &param(void) const { return _param; }
+
+public:
+  /**
+   * @brief Visit this Node by NodeVisitor
+   * @param[in] v Visitor
+   * @return N/A
+   */
+  void accept(NodeVisitor &&) const override;
+
+private:
+  const Param _param;
+};
+
+} // namespace HashtableLookup
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_HASHTABLE_LOOKUP_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/L2Normalization.cc b/runtimes/pure_arm_compute/src/internal/op/L2Normalization.cc
index 449540178..44a6ee63d 100644
--- a/runtimes/pure_arm_compute/src/internal/op/L2Normalization.cc
+++ b/runtimes/pure_arm_compute/src/internal/op/L2Normalization.cc
@@ -1,3 +1,19 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 #include "internal/op/L2Normalization.h"
 #include "internal/op/NodeVisitor.h"
 
diff --git a/runtimes/pure_arm_compute/src/internal/op/L2Normalization.h b/runtimes/pure_arm_compute/src/internal/op/L2Normalization.h
index 70fa2fb7e..2e94fac11 100644
--- a/runtimes/pure_arm_compute/src/internal/op/L2Normalization.h
+++ b/runtimes/pure_arm_compute/src/internal/op/L2Normalization.h
@@ -1,3 +1,25 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file    L2Normalization.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines internal::tflite::op::L2Normalization::Param struct
+ *          and internal::tflite::op::L2Normalization::Node class
+ */
 #ifndef __INTERNAL_OP_L2_NORMALIZATION_H__
 #define __INTERNAL_OP_L2_NORMALIZATION_H__
 
@@ -14,31 +36,62 @@ namespace op
 namespace L2Normalization
 {
 
+/**
+ * @brief Struct to have indexes for L2Normalization operation parameter
+ */
 struct Param
 {
-  int32_t ofm_index;
-
-  int32_t ifm_index;
+  int32_t ofm_index; /**< Index of output feature map */
 
+  int32_t ifm_index; /**< Index of input feature map */
+  /**
+   * @brief Construct as default
+   */
   Param() = default;
+  /**
+   * @brief Construct a new Param object with params
+   * @param[in] inputCount  Count of inputs
+   * @param[in] inputs      Pointer of inputs
+   * @param[in] outputCount Count of outputs
+   * @param[in] outputs     Pointer of outputs
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to represent an L2Normalization operation of data structure
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Node object with param
+   * @param[in] param Param object that makes up a Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Destruct as default
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief  Get a reference of Param object
+   * @return Reference of Param object
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Visit this Node by NodeVisitor
+   * @param[in] v Visitor
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/L2Pool2D.cc b/runtimes/pure_arm_compute/src/internal/op/L2Pool2D.cc
index 73c1bb65c..64041ab49 100644
--- a/runtimes/pure_arm_compute/src/internal/op/L2Pool2D.cc
+++ b/runtimes/pure_arm_compute/src/internal/op/L2Pool2D.cc
@@ -1,124 +1,124 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "internal/op/L2Pool2D.h"
-#include "internal/op/NodeVisitor.h"
-
-#include <cassert>
-
-namespace internal
-{
-namespace tflite
-{
-namespace op
-{
-namespace L2Pool2D
-{
-namespace Explicit
-{
-
-void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
-
-} // namespace Explicit
-
-namespace Implicit
-{
-
-void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
-
-} // namespace Implicit
-} // namespace L2Pool2D
-} // namespace op
-} // namespace tflite
-} // namespace internal
-
-namespace internal
-{
-namespace tflite
-{
-namespace op
-{
-namespace L2Pool2D
-{
-namespace Explicit
-{
-
-Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
-             const uint32_t *outputs)
-{
-  assert(inputCount == 10 && outputCount == 1);
-
-  ofm_index = outputs[0];
-
-  // Each input should be interpreted as follows:
-  //
-  //  0 -> IFM Tensor Index
-  //  1 -> Padding_left index
-  //  2 -> Padding_right index
-  //  3 -> Padding_top index
-  //  4 -> Padding_bottom index
-  //  5 -> Horizontal (over width) Stride Index
-  //  6 -> Vertial (over height) Stride Index
-  //  7 -> Filter Width Index
-  //  8 -> Filter Height Index
-  //  9 -> FuseCode (activation) Index
-  ifm_index = inputs[0];
-  padding_left_index = inputs[1];
-  padding_right_index = inputs[2];
-  padding_top_index = inputs[3];
-  padding_bottom_index = inputs[4];
-  hstride_index = inputs[5];
-  vstride_index = inputs[6];
-  kw_index = inputs[7];
-  kh_index = inputs[8];
-  activation_index = inputs[9];
-}
-
-} // namespace Explicit
-
-namespace Implicit
-{
-
-Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
-             const uint32_t *outputs)
-{
-  assert(inputCount == 7 && outputCount == 1);
-
-  ofm_index = outputs[0];
-
-  // Each input should be interpreted as follows:
-  //
-  //  0 -> IFM Tensor Index
-  //  1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
-  //  2 -> Horizontal (over width) Stride Index
-  //  3 -> Vertial (over height) Stride Index
-  //  4 -> Filter Width Index
-  //  5 -> Filter Height Index
-  //  6 -> FuseCode (activation) Index
-  ifm_index = inputs[0];
-  padding_index = inputs[1];
-  hstride_index = inputs[2];
-  vstride_index = inputs[3];
-  kw_index = inputs[4];
-  kh_index = inputs[5];
-  activation_index = inputs[6];
-}
-
-} // namespace Implicit
-} // namespace L2Pool2D
-} // namespace op
-} // namespace tflite
-} // namespace internal
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/L2Pool2D.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace L2Pool2D
+{
+namespace Explicit
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Explicit
+
+namespace Implicit
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Implicit
+} // namespace L2Pool2D
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace L2Pool2D
+{
+namespace Explicit
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+             const uint32_t *outputs)
+{
+  assert(inputCount == 10 && outputCount == 1);
+
+  ofm_index = outputs[0];
+
+  // Each input should be interpreted as follows:
+  //
+  //  0 -> IFM Tensor Index
+  //  1 -> Padding_left index
+  //  2 -> Padding_right index
+  //  3 -> Padding_top index
+  //  4 -> Padding_bottom index
+  //  5 -> Horizontal (over width) Stride Index
+  //  6 -> Vertial (over height) Stride Index
+  //  7 -> Filter Width Index
+  //  8 -> Filter Height Index
+  //  9 -> FuseCode (activation) Index
+  ifm_index = inputs[0];
+  padding_left_index = inputs[1];
+  padding_right_index = inputs[2];
+  padding_top_index = inputs[3];
+  padding_bottom_index = inputs[4];
+  hstride_index = inputs[5];
+  vstride_index = inputs[6];
+  kw_index = inputs[7];
+  kh_index = inputs[8];
+  activation_index = inputs[9];
+}
+
+} // namespace Explicit
+
+namespace Implicit
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+             const uint32_t *outputs)
+{
+  assert(inputCount == 7 && outputCount == 1);
+
+  ofm_index = outputs[0];
+
+  // Each input should be interpreted as follows:
+  //
+  //  0 -> IFM Tensor Index
+  //  1 -> Padding Code (ANEURALNETWORKS_PADDING_SAME or ANEURALNETWORKS_PADDING_VALID) Index
+  //  2 -> Horizontal (over width) Stride Index
+  //  3 -> Vertial (over height) Stride Index
+  //  4 -> Filter Width Index
+  //  5 -> Filter Height Index
+  //  6 -> FuseCode (activation) Index
+  ifm_index = inputs[0];
+  padding_index = inputs[1];
+  hstride_index = inputs[2];
+  vstride_index = inputs[3];
+  kw_index = inputs[4];
+  kh_index = inputs[5];
+  activation_index = inputs[6];
+}
+
+} // namespace Implicit
+} // namespace L2Pool2D
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/L2Pool2D.h b/runtimes/pure_arm_compute/src/internal/op/L2Pool2D.h
index f4a25539b..facb223c7 100644
--- a/runtimes/pure_arm_compute/src/internal/op/L2Pool2D.h
+++ b/runtimes/pure_arm_compute/src/internal/op/L2Pool2D.h
@@ -1,130 +1,198 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __INTERNAL_OP_L2_POOL_2D_H__
-#define __INTERNAL_OP_L2_POOL_2D_H__
-
-#include "internal/op/Node.h"
-
-#include <cstdint>
-
-namespace internal
-{
-namespace tflite
-{
-namespace op
-{
-namespace L2Pool2D
-{
-namespace Explicit
-{
-
-struct Param
-{
-  int32_t ofm_index;
-
-  int32_t ifm_index;
-
-  int32_t kw_index;
-  int32_t kh_index;
-
-  int32_t hstride_index;
-  int32_t vstride_index;
-
-  int32_t padding_left_index;
-  int32_t padding_right_index;
-  int32_t padding_top_index;
-  int32_t padding_bottom_index;
-
-  int32_t activation_index;
-
-  Param() = default;
-  Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
-};
-
-class Node final : public op::Node
-{
-public:
-  Node(const Param &param) : _param(param)
-  {
-    // DO NOTHING
-  }
-
-public:
-  virtual ~Node() = default;
-
-public:
-  const Param &param(void) const { return _param; }
-
-public:
-  void accept(NodeVisitor &&) const override;
-
-private:
-  const Param _param;
-};
-
-} // namespace Explicit
-
-namespace Implicit
-{
-
-struct Param
-{
-  int32_t ofm_index;
-
-  int32_t ifm_index;
-
-  int32_t kw_index;
-  int32_t kh_index;
-
-  int32_t hstride_index;
-  int32_t vstride_index;
-
-  int32_t padding_index;
-  int32_t activation_index;
-
-  Param() = default;
-  Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
-};
-
-class Node final : public op::Node
-{
-public:
-  Node(const Param &param) : _param(param)
-  {
-    // DO NOTHING
-  }
-
-public:
-  virtual ~Node() = default;
-
-public:
-  const Param &param(void) const { return _param; }
-
-public:
-  void accept(NodeVisitor &&) const override;
-
-private:
-  const Param _param;
-};
-
-} // namespace Implicit
-} // namespace L2Pool2D
-} // namespace op
-} // namespace tflite
-} // namespace internal
-
-#endif // __INTERNAL_OP_L2_POOL_2D_H__
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file    L2Pool2D.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines internal::tflite::op::L2Pool2D Param structs
+ *          and internal::tflite::op::L2Pool2D Node classes
+ */
+#ifndef __INTERNAL_OP_L2_POOL_2D_H__
+#define __INTERNAL_OP_L2_POOL_2D_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace L2Pool2D
+{
+namespace Explicit
+{
+
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
+struct Param
+{
+  int32_t ofm_index; /**< Index of output feature map */
+
+  int32_t ifm_index; /**< Index of input feature map */
+
+  int32_t kw_index; /**< Index of kernel width */
+  int32_t kh_index; /**< Index of kernel height */
+
+  int32_t hstride_index; /**< Index of horizontal stride */
+  int32_t vstride_index; /**< Index of vertical stride */
+
+  int32_t padding_left_index;   /**< Index of padding left */
+  int32_t padding_right_index;  /**< Index of padding right */
+  int32_t padding_top_index;    /**< Index of padding top */
+  int32_t padding_bottom_index; /**< Index of padding bottom */
+
+  int32_t activation_index; /**< Index of activation */
+  /**
+   * @brief Construct as default
+   */
+  Param() = default;
+  /**
+   * @brief Construct a new Param object with params
+   * @param[in] inputCount  Count of inputs
+   * @param[in] inputs      Pointer of inputs
+   * @param[in] outputCount Count of outputs
+   * @param[in] outputs     Pointer of outputs
+   */
+  Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to represent an operation of data structure
+ */
+class Node final : public op::Node
+{
+public:
+  /**
+   * @brief Construct a new Node object with param
+   * @param[in] param Param object that makes up a Node
+   */
+  Node(const Param &param) : _param(param)
+  {
+    // DO NOTHING
+  }
+
+public:
+  /**
+   * @brief Destruct as default
+   */
+  virtual ~Node() = default;
+
+public:
+  /**
+   * @brief  Get a reference of Param object
+   * @return Reference of Param object
+   */
+  const Param &param(void) const { return _param; }
+
+public:
+  /**
+   * @brief Visit this Node by NodeVisitor
+   * @param[in] v Visitor
+   * @return N/A
+   */
+  void accept(NodeVisitor &&) const override;
+
+private:
+  const Param _param;
+};
+
+} // namespace Explicit
+
+namespace Implicit
+{
+
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
+struct Param
+{
+  int32_t ofm_index; /**< Index of output feature map */
+
+  int32_t ifm_index; /**< Index of input feature map */
+
+  int32_t kw_index; /**< Index of kernel width */
+  int32_t kh_index; /**< Index of kernel height */
+
+  int32_t hstride_index; /**< Index of horizontal stride */
+  int32_t vstride_index; /**< Index of vertical stride */
+
+  int32_t padding_index;    /**< Index of padding */
+  int32_t activation_index; /**< Index of activation */
+  /**
+   * @brief Construct as default
+   */
+  Param() = default;
+  /**
+   * @brief Construct a new Param object with params
+   * @param[in] inputCount  Count of inputs
+   * @param[in] inputs      Pointer of inputs
+   * @param[in] outputCount Count of outputs
+   * @param[in] outputs     Pointer of outputs
+   */
+  Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to represent an operation of data structure
+ */
+class Node final : public op::Node
+{
+public:
+  /**
+   * @brief Construct a new Node object with param
+   * @param[in] param Param object that makes up a Node
+   */
+  Node(const Param &param) : _param(param)
+  {
+    // DO NOTHING
+  }
+
+public:
+  /**
+   * @brief Destruct as default
+   */
+  virtual ~Node() = default;
+
+public:
+  /**
+   * @brief  Get a reference of Param object
+   * @return Reference of Param object
+   */
+  const Param &param(void) const { return _param; }
+
+public:
+  /**
+   * @brief Visit this Node by NodeVisitor
+   * @param[in] v Visitor
+   * @return N/A
+   */
+  void accept(NodeVisitor &&) const override;
+
+private:
+  const Param _param;
+};
+
+} // namespace Implicit
+} // namespace L2Pool2D
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_L2_POOL_2D_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/LocalResponseNormalization.cc b/runtimes/pure_arm_compute/src/internal/op/LocalResponseNormalization.cc
new file mode 100644
index 000000000..b7419d923
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/LocalResponseNormalization.cc
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/LocalResponseNormalization.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LocalResponseNormalization
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace LocalResponseNormalization
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LocalResponseNormalization
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+             const uint32_t *outputs)
+{
+  assert(inputCount == 5 && outputCount == 1);
+
+  ofm_index = outputs[0];
+
+  ifm_index = inputs[0];
+  radius_index = inputs[1];
+  bias_index = inputs[2];
+  alpha_index = inputs[3];
+  beta_index = inputs[4];
+}
+
+} // namespace LocalResponseNormalization
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/LocalResponseNormalization.h b/runtimes/pure_arm_compute/src/internal/op/LocalResponseNormalization.h
new file mode 100644
index 000000000..29e0699ad
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/LocalResponseNormalization.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_LOCAL_RESPONSE_NORMALIZATION_H__
+#define __INTERNAL_OP_LOCAL_RESPONSE_NORMALIZATION_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LocalResponseNormalization
+{
+
+struct Param
+{
+  int32_t ofm_index;
+
+  int32_t ifm_index;
+  int32_t radius_index;
+  int32_t bias_index;
+  int32_t alpha_index;
+  int32_t beta_index;
+
+  Param() = default;
+  Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+  Node(const Param &param) : _param(param)
+  {
+    // DO NOTHING
+  }
+
+public:
+  virtual ~Node() = default;
+
+public:
+  const Param &param(void) const { return _param; }
+
+public:
+  void accept(NodeVisitor &&) const override;
+
+private:
+  const Param _param;
+};
+
+} // namespace LocalResponseNormalization
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif //  __INTERNAL_OP_LOCAL_RESPONSE_NORMALIZATION_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/LogicalAnd.cc b/runtimes/pure_arm_compute/src/internal/op/LogicalAnd.cc
new file mode 100644
index 000000000..5b7da4d3b
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/LogicalAnd.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/LogicalAnd.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalAnd
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace LogicalAnd
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalAnd
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+             const uint32_t *outputs)
+{
+  assert(inputCount == 2 && outputCount == 1);
+
+  output_index = outputs[0];
+
+  // Each input should be interpreted as follows:
+  //
+  //  0 -> input1 Tensor Index
+  //  1 -> input2 Tensor Index
+  input1_index = inputs[0];
+  input2_index = inputs[1];
+}
+
+} // namespace LogicalAnd
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/LogicalAnd.h b/runtimes/pure_arm_compute/src/internal/op/LogicalAnd.h
new file mode 100644
index 000000000..2f53f756d
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/LogicalAnd.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_LOGICAL_AND_H__
+#define __INTERNAL_OP_LOGICAL_AND_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalAnd
+{
+
+struct Param
+{
+  int32_t output_index;
+
+  int32_t input1_index;
+  int32_t input2_index;
+
+  Param() = default;
+  Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+} // namespace LogicalAnd
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalAnd
+{
+class Node final : public op::Node
+{
+public:
+  Node(const Param &param) : _param(param)
+  {
+    // DO NOTHING
+  }
+
+public:
+  virtual ~Node() = default;
+
+public:
+  const Param &param(void) const { return _param; }
+
+public:
+  void accept(NodeVisitor &&) const override;
+
+private:
+  const Param _param;
+};
+
+} // namespace LogicalAnd
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_LOGICAL_AND_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/LogicalNot.cc b/runtimes/pure_arm_compute/src/internal/op/LogicalNot.cc
new file mode 100644
index 000000000..4cb6a8e2a
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/LogicalNot.cc
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/LogicalNot.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalNot
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace LogicalNot
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalNot
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+             const uint32_t *outputs)
+{
+  assert(inputCount == 1 && outputCount == 1);
+
+  output_index = outputs[0];
+
+  input_index = inputs[0];
+}
+
+} // namespace LogicalNot
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/LogicalNot.h b/runtimes/pure_arm_compute/src/internal/op/LogicalNot.h
new file mode 100644
index 000000000..9593deafe
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/LogicalNot.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_LOGICAL_NOT_H__
+#define __INTERNAL_OP_LOGICAL_NOT_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalNot
+{
+
+struct Param
+{
+  int32_t output_index;
+
+  int32_t input_index;
+
+  Param() = default;
+  Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+} // namespace LogicalNot
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalNot
+{
+class Node final : public op::Node
+{
+public:
+  Node(const Param &param) : _param(param)
+  {
+    // DO NOTHING
+  }
+
+public:
+  virtual ~Node() = default;
+
+public:
+  const Param &param(void) const { return _param; }
+
+public:
+  void accept(NodeVisitor &&) const override;
+
+private:
+  const Param _param;
+};
+
+} // namespace LogicalNot
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_LOGICAL_NOT_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/LogicalOr.cc b/runtimes/pure_arm_compute/src/internal/op/LogicalOr.cc
new file mode 100644
index 000000000..8295f6f0b
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/LogicalOr.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/LogicalOr.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalOr
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace LogicalOr
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalOr
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+             const uint32_t *outputs)
+{
+  assert(inputCount == 2 && outputCount == 1);
+
+  output_index = outputs[0];
+
+  // Each input should be interpreted as follows:
+  //
+  //  0 -> input1 Tensor Index
+  //  1 -> input2 Tensor Index
+  input1_index = inputs[0];
+  input2_index = inputs[1];
+}
+
+} // namespace LogicalOr
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/LogicalOr.h b/runtimes/pure_arm_compute/src/internal/op/LogicalOr.h
new file mode 100644
index 000000000..6487fa720
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/LogicalOr.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_LOGICAL_OR_H__
+#define __INTERNAL_OP_LOGICAL_OR_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalOr
+{
+
+struct Param
+{
+  int32_t output_index;
+
+  int32_t input1_index;
+  int32_t input2_index;
+
+  Param() = default;
+  Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+} // namespace LogicalOr
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace LogicalOr
+{
+class Node final : public op::Node
+{
+public:
+  Node(const Param &param) : _param(param)
+  {
+    // DO NOTHING
+  }
+
+public:
+  virtual ~Node() = default;
+
+public:
+  const Param &param(void) const { return _param; }
+
+public:
+  void accept(NodeVisitor &&) const override;
+
+private:
+  const Param _param;
+};
+
+} // namespace LogicalOr
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_LOGICAL_OR_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/Logistic.h b/runtimes/pure_arm_compute/src/internal/op/Logistic.h
index db8935846..a42fdc0d4 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Logistic.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Logistic.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file    Logistic.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines internal::tflite::op::Logistic::Param struct
+ *          and internal::tflite::op::Logistic::Node class
+ */
 #ifndef __INTERNAL_OP_LOGISTIC_H__
 #define __INTERNAL_OP_LOGISTIC_H__
 
@@ -30,31 +36,61 @@ namespace op
 namespace Logistic
 {
 
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
 struct Param
 {
-  int32_t ofm_index;
-
-  int32_t ifm_index;
+  int32_t ofm_index; /**< Index of output feature map */
 
+  int32_t ifm_index; /**< Index of input feature map */
+  /**
+   * @brief Construct as default
+   */
   Param() = default;
+  /**
+   * @brief Construct a new Param object with params
+   * @param[in] inputCount  Count of inputs
+   * @param[in] inputs      Pointer of inputs
+   * @param[in] outputCount Count of outputs
+   * @param[in] outputs     Pointer of outputs
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to represent an operation of data structure
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Node object with param
+   * @param[in] param Param object that makes up a Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Destruct as default
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief  Get a reference of Param object
+   * @return Reference of Param object
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Visit this Node by NodeVisitor
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/Lstm.h b/runtimes/pure_arm_compute/src/internal/op/Lstm.h
index 056ac2ea7..f51f0402a 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Lstm.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Lstm.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file    Lstm.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines internal::tflite::op::LSTM::Param struct
+ *          and internal::tflite::op::LSTM::Node class
+ */
 #ifndef __INTERNAL_OP_LSTM_H__
 #define __INTERNAL_OP_LSTM_H__
 
@@ -30,56 +36,87 @@ namespace op
 namespace LSTM
 {
 
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
 struct Param
 {
-  int32_t scratch_buffer_index;
-  int32_t output_state_out_index;
-  int32_t cell_state_out_index;
-  int32_t output_index;
+  int32_t scratch_buffer_index;   /**< Index of scartch buffer */
+  int32_t output_state_out_index; /**< Index of output state out */
+  int32_t cell_state_out_index;   /**< Index of cell state out */
+  int32_t output_index;           /**< Index of output */
 
-  int32_t input_index;
-  int32_t input_to_input_weights_index;
-  int32_t input_to_forget_weights_index;
-  int32_t input_to_cell_weights_index;
-  int32_t input_to_output_weights_index;
-  int32_t recurrent_to_input_weights_index;
-  int32_t recurrent_to_forget_weights_index;
-  int32_t recurrent_to_cell_weights_index;
-  int32_t recurrent_to_output_weights_index;
-  int32_t cell_to_input_weights_index;
-  int32_t cell_to_forget_weights_index;
-  int32_t cell_to_output_weights_index;
-  int32_t input_gate_bias_index;
-  int32_t forget_gate_bias_index;
-  int32_t cell_bias_index;
-  int32_t output_gate_bias_index;
-  int32_t projection_weights_index;
-  int32_t projection_bias_index;
-  int32_t output_state_in_index;
-  int32_t cell_state_in_index;
-  int32_t activation_index;
-  int32_t cell_threshold_index;
-  int32_t projection_threshold_index;
+  int32_t input_index;                       /**< Index of input */
+  int32_t input_to_input_weights_index;      /**< Index of input to input weights */
+  int32_t input_to_forget_weights_index;     /**< Index of input to forget weights */
+  int32_t input_to_cell_weights_index;       /**< Index of input to cell weights */
+  int32_t input_to_output_weights_index;     /**< Index of input to output weights */
+  int32_t recurrent_to_input_weights_index;  /**< Index of recurrent to input weights */
+  int32_t recurrent_to_forget_weights_index; /**< Index of recurrent to forget weights */
+  int32_t recurrent_to_cell_weights_index;   /**< Index of recurrent to cell weights */
+  int32_t recurrent_to_output_weights_index; /**< Index of recurrent to output weights */
+  int32_t cell_to_input_weights_index;       /**< Index of cell to input weights */
+  int32_t cell_to_forget_weights_index;      /**< Index of cell to forget weights */
+  int32_t cell_to_output_weights_index;      /**< Index of cell to output weights */
+  int32_t input_gate_bias_index;             /**< Index of input gate bias */
+  int32_t forget_gate_bias_index;            /**< Index of forget gate bias */
+  int32_t cell_bias_index;                   /**< Index of cell bias */
+  int32_t output_gate_bias_index;            /**< Index of output gate bias */
+  int32_t projection_weights_index;          /**< Index of projection weights */
+  int32_t projection_bias_index;             /**< Index of projection bias */
+  int32_t output_state_in_index;             /**< Index of output state in */
+  int32_t cell_state_in_index;               /**< Index of cell state in */
+  int32_t activation_index;                  /**< Index of activation */
+  int32_t cell_threshold_index;              /**< Index of cell threshold */
+  int32_t projection_threshold_index;        /**< Index of projection threshold */
 
+  /**
+   * @brief Construct as default
+   */
   Param() = default;
+  /**
+   * @brief     Construct a new Param object with params
+   * @param[in] inputCount  Count of inputs
+   * @param[in] inputs      Pointer of inputs
+   * @param[in] outputCount Count of outputs
+   * @param[in] outputs     Pointer of outputs
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to represent an operation of data structure
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief     Construct a new Node object with param
+   * @param[in] param Param object that makes up a Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Destruct as default
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief  Get a reference of Param object
+   * @return Reference of Param object
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief  Visit this Node by NodeVisitor
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/MaxPool2D.h b/runtimes/pure_arm_compute/src/internal/op/MaxPool2D.h
index d5da17d66..329ccecb7 100644
--- a/runtimes/pure_arm_compute/src/internal/op/MaxPool2D.h
+++ b/runtimes/pure_arm_compute/src/internal/op/MaxPool2D.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file MaxPool2D.h
+ * @brief This file contains accept function and params for MaxPool2D operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
 #ifndef __INTERNAL_OP_MAX_POOL_2D_H__
 #define __INTERNAL_OP_MAX_POOL_2D_H__
 
@@ -32,44 +38,77 @@ namespace MaxPool2D
 namespace Explicit
 {
 
+/**
+ * @brief Struct of MaxPool2D(Explicit) operation's param
+ */
 struct Param
 {
-  int32_t ofm_index;
+  int32_t ofm_index; /**< Output format index */
 
-  int32_t ifm_index;
+  int32_t ifm_index; /**< Input format index */
 
-  int32_t kw_index;
-  int32_t kh_index;
+  int32_t kw_index; /**< Kernel width index */
+  int32_t kh_index; /**< Kernel height index */
 
-  int32_t hstride_index;
-  int32_t vstride_index;
+  int32_t hstride_index; /**< Horizontal stride index */
+  int32_t vstride_index; /**< Vertical stride index */
 
-  int32_t padding_left_index;
-  int32_t padding_right_index;
-  int32_t padding_top_index;
-  int32_t padding_bottom_index;
+  int32_t padding_left_index;   /**< Left padding index */
+  int32_t padding_right_index;  /**< Right padding index */
+  int32_t padding_top_index;    /**< Top padding index */
+  int32_t padding_bottom_index; /**< Bottom padding index */
 
-  int32_t activation_index;
+  int32_t activation_index; /**< Activation index */
 
+  /**
+   * @brief Construct a new Param object for MaxPool2D(Explicit) as default
+   */
   Param() = default;
+
+  /**
+   * @brief Construct a new Param object for MaxPool2D(Explicit) with params
+   * @param [in] inputCount The number of input
+   * @param [in] inputs Array containing inputs
+   * @param [in] outputCount The number of output
+   * @param [in] outputs Array containing outputs
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to define operation node for MaxPool2D(Explicit)
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Node object for MaxPool2D(Explicit) with param
+   * @param [in] param Parameters for Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Destroy the Node object for MaxPool2D(Explicit)
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief Get parameters for MaxPool2D(Explicit)
+   * @return Parameters of MaxPool2D(Explicit)
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Function for accepting node for MaxPool2D(Explicit)
+   * @param [in] v Node visitor for invoking visit function of MaxPool2D(Explicit)
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
@@ -81,40 +120,73 @@ private:
 namespace Implicit
 {
 
+/**
+ * @brief Struct of MaxPool2D(Implicit) operation's param
+ */
 struct Param
 {
-  int32_t ofm_index;
+  int32_t ofm_index; /**< Output format index */
 
-  int32_t ifm_index;
+  int32_t ifm_index; /**< Input format index */
 
-  int32_t kw_index;
-  int32_t kh_index;
+  int32_t kw_index; /**< Kernel width index */
+  int32_t kh_index; /**< Kernel height index */
 
-  int32_t hstride_index;
-  int32_t vstride_index;
+  int32_t hstride_index; /**< Horizontal stride index */
+  int32_t vstride_index; /**< Vertical stride index */
 
-  int32_t padding_index;
-  int32_t activation_index;
+  int32_t padding_index;    /**< Padding index */
+  int32_t activation_index; /**< Activation index */
 
+  /**
+   * @brief Construct a new Param object for MaxPool2D(Implicit) as default
+   */
   Param() = default;
+
+  /**
+   * @brief Construct a new Param object for MaxPool2D(Implicit) with params
+   * @param [in] inputCount The number of input
+   * @param [in] inputs Array containing inputs
+   * @param [in] outputCount The number of output
+   * @param [in] outputs Array containing outputs
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to define operation node for MaxPool2D(Implicit)
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Node object for MaxPool2D(Implicit) with param
+   * @param [in] param Parameters for Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Destroy the Node object for MaxPool2D(Implicit)
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief Get parameters for MaxPool2D(Implicit)
+   * @return Parameters of MaxPool2D(Implicit)
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Function for accepting node for MaxPool2D(Implicit)
+   * @param [in] v Node visitor for invoking visit function of MaxPool2D(Implicit)
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/Mean.h b/runtimes/pure_arm_compute/src/internal/op/Mean.h
index 385b38dbf..f8e7ed308 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Mean.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Mean.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file    Mean.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines internal::tflite::op::Mean::Param struct
+ *          and internal::tflite::op::Mean::Node class
+ */
 #ifndef __INTERNAL_OP_MEAN_H__
 #define __INTERNAL_OP_MEAN_H__
 
@@ -30,33 +36,64 @@ namespace op
 namespace Mean
 {
 
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
 struct Param
 {
-  int32_t ofm_index; // output
-
-  int32_t ifm_index;       // input
-  int32_t axis_index;      // axis
-  int32_t keep_dims_index; // keep_dims
+  int32_t ofm_index; /**< Index of output feature map */ // output
 
+  int32_t ifm_index; /**< Index of input feature map */ // input
+  int32_t axis_index; /**< Index of axis */             // axis
+  int32_t keep_dims_index; /**< Index of keep dims */   // keep_dims
+  /**
+   * @brief Construct as default
+   */
   Param() = default;
+  /**
+ * @brief Construct a new Param object with params
+ * @param[in] inputCount  Count of inputs
+ * @param[in] inputs      Pointer of inputs
+ * @param[in] outputCount Count of outputs
+ * @param[in] outputs     Pointer of outputs
+ */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to represent an operation of data structure
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Node object with param
+   * @param[in] param Param object that makes up a Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Destruct as default
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief  Get a reference of Param object
+   * @return Reference of Param object
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Visit this Node by NodeVisitor
+   * @param[in] v Visitor
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/Mul.h b/runtimes/pure_arm_compute/src/internal/op/Mul.h
index ebb72c4be..9710dd057 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Mul.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Mul.h
@@ -14,6 +14,11 @@
  * limitations under the License.
  */
 
+/**
+ * @file    Mul.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines internal::tflite::op::Mul class
+ */
 #ifndef __INTERNAL_OP_MUL_H__
 #define __INTERNAL_OP_MUL_H__
 
@@ -30,33 +35,63 @@ namespace op
 namespace Mul
 {
 
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
 struct Param
 {
-  int32_t ofm_index;
-
-  int32_t lhs_index;
-  int32_t rhs_index;
-  int32_t activation_index;
+  int32_t ofm_index; /** Index of output feature map */
 
+  int32_t lhs_index;        /** Index of lhs */
+  int32_t rhs_index;        /** Index of rhs */
+  int32_t activation_index; /** Index of activation */
+  /**
+   * @brief Construct as default
+   */
   Param() = default;
+  /**
+   * @brief     Construct a new Param object with params
+   * @param[in] inputCount  Count of inputs
+   * @param[in] inputs      Pointer of inputs
+   * @param[in] outputCount Count of outputs
+   * @param[in] outputs     Pointer of outputs
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to represent an operation of data structure
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief     Construct a new Node object with param
+   * @param[in] param Param object that makes up a Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Destruct as default
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief  Get a reference of Param object
+   * @return Reference of Param object
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief  Visit this Node by NodeVisitor
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/Neg.cc b/runtimes/pure_arm_compute/src/internal/op/Neg.cc
new file mode 100644
index 000000000..72fecf484
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/Neg.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Neg.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Neg
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Neg
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Neg
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+             const uint32_t *outputs)
+{
+  assert(inputCount == 1 && outputCount == 1);
+
+  ofm_index = outputs[0];
+
+  // Each input should be interpreted as follows:
+  //
+  //  0 -> Input Tensor Index
+  ifm_index = inputs[0];
+}
+
+} // namespace Neg
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/Neg.h b/runtimes/pure_arm_compute/src/internal/op/Neg.h
new file mode 100644
index 000000000..77507df3d
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/Neg.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_NEG_H__
+#define __INTERNAL_OP_NEG_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Neg
+{
+
+struct Param
+{
+  int32_t ofm_index;
+
+  int32_t ifm_index;
+
+  Param() = default;
+  Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+  Node(const Param &param) : _param(param)
+  {
+    // DO NOTHING
+  }
+
+public:
+  virtual ~Node() = default;
+
+public:
+  const Param &param(void) const { return _param; }
+
+public:
+  void accept(NodeVisitor &&) const override;
+
+private:
+  const Param _param;
+};
+
+} // namespace Neg
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_NEG_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/Node.h b/runtimes/pure_arm_compute/src/internal/op/Node.h
index 3927c20f0..be1cbdb5b 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Node.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Node.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file Node.h
+ * @brief This file contains struct of Node and NodeVisitor
+ * @ingroup COM_AI_RUNTIME
+ */
+
 #ifndef __INTERNAL_OP_NODE_H__
 #define __INTERNAL_OP_NODE_H__
 
@@ -24,12 +30,26 @@ namespace tflite
 namespace op
 {
 
+/**
+ * @brief Struct of operation NodeVisitor
+ */
 struct NodeVisitor;
 
+/**
+ * @brief Struct of operation Node
+ */
 struct Node
 {
+  /**
+   * @brief Destroy the Node object for operation
+   */
   virtual ~Node() = default;
 
+  /**
+   * @brief Function for accepting node for operation
+   * @param [in] v Node visitor for invoking visit function of operation
+   * @return N/A
+   */
   virtual void accept(NodeVisitor &&) const = 0;
 };
 
diff --git a/runtimes/pure_arm_compute/src/internal/op/NodeVisitor.h b/runtimes/pure_arm_compute/src/internal/op/NodeVisitor.h
index 6d8d10af0..0c1a4001d 100644
--- a/runtimes/pure_arm_compute/src/internal/op/NodeVisitor.h
+++ b/runtimes/pure_arm_compute/src/internal/op/NodeVisitor.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file NodeVisitor.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines NodeVisitor
+ */
+
 #ifndef __INTERNAL_OP_NODE_VISITOR_H__
 #define __INTERNAL_OP_NODE_VISITOR_H__
 
@@ -26,6 +32,7 @@
 #include "internal/op/Dequantize.h"
 #include "internal/op/MaxPool2D.h"
 #include "internal/op/AvgPool2D.h"
+#include "internal/op/ArgMax.h"
 #include "internal/op/Concat.h"
 #include "internal/op/Reshape.h"
 #include "internal/op/ResizeBilinear.h"
@@ -33,9 +40,11 @@
 #include "internal/op/FullyConnected.h"
 #include "internal/op/Softmax.h"
 #include "internal/op/ReduceMax.h"
+#include "internal/op/ReduceMin.h"
 #include "internal/op/Cast.h"
 #include "internal/op/TopKV2.h"
 #include "internal/op/Gather.h"
+#include "internal/op/PReLU.h"
 #include "internal/op/ReLU.h"
 #include "internal/op/ReLU1.h"
 #include "internal/op/ReLU6.h"
@@ -49,13 +58,30 @@
 #include "internal/op/Floor.h"
 #include "internal/op/Split.h"
 #include "internal/op/RSQRT.h"
+#include "internal/op/SQRT.h"
 #include "internal/op/Pad.h"
 #include "internal/op/SpaceToDepth.h"
+#include "internal/op/SpaceToBatchND.h"
 #include "internal/op/L2Pool2D.h"
 #include "internal/op/EmbeddingLookup.h"
 #include "internal/op/HashtableLookup.h"
 #include "internal/op/L2Normalization.h"
 #include "internal/op/SquaredDifference.h"
+#include "internal/op/LocalResponseNormalization.h"
+#include "internal/op/DepthToSpace.h"
+#include "internal/op/Unpack.h"
+#include "internal/op/Neg.h"
+#include "internal/op/Exp.h"
+#include "internal/op/ReduceSum.h"
+#include "internal/op/Equal.h"
+#include "internal/op/BatchToSpaceNd.h"
+#include "internal/op/TransposeConv.h"
+#include "internal/op/Pack.h"
+#include "internal/op/Abs.h"
+#include "internal/op/NotEqual.h"
+#include "internal/op/LogicalAnd.h"
+#include "internal/op/LogicalNot.h"
+#include "internal/op/LogicalOr.h"
 
 namespace internal
 {
@@ -64,54 +90,400 @@ namespace tflite
 namespace op
 {
 
+/**
+ * @brief Struct to define visitor for operation Nodes
+ */
 struct NodeVisitor
 {
+  /**
+   * @brief Destruct NodeVisitor object with default
+   */
   virtual ~NodeVisitor() = default;
 
+  /**
+   * @brief Visit an Add node
+   * @param[in] node Add node to visit
+   * @return N/A
+   */
   virtual void visit(const Add::Node &) = 0;
+  /**
+   * @brief Visit a Mul node
+   * @param[in] node Mul node to visit
+   * @return N/A
+   */
   virtual void visit(const Sub::Node &) = 0;
+  /**
+   * @brief Visit a Mul node
+   * @param[in] node Mul node to visit
+   * @return N/A
+   */
   virtual void visit(const Mul::Node &) = 0;
+  /**
+   * @brief Visit a Div node
+   * @param[in] node Div node to visit
+   * @return N/A
+   */
   virtual void visit(const Div::Node &) = 0;
+  /**
+   * @brief Visit a Conv2D node with implicit padding
+   * @param[in] node Conv2D node to visit
+   * @return N/A
+   */
   virtual void visit(const Conv2D::Implicit::Node &) = 0;
+  /**
+   * @brief Visit a Conv2D node with explicit padding
+   * @param[in] node Conv2D node to visit
+   * @return N/A
+   */
   virtual void visit(const Conv2D::Explicit::Node &) = 0;
+  /**
+   * @brief Visit a DepthwiseConv2D node with implicit padding
+   * @param[in] node DepthwiseConv2D node to visit
+   * @return N/A
+   */
   virtual void visit(const DepthwiseConv2D::Implicit::Node &) = 0;
+  /**
+   * @brief Visit a DepthwiseConv2D node with explicit padding
+   * @param[in] node DepthwiseConv2D node to visit
+   * @return N/A
+   */
   virtual void visit(const DepthwiseConv2D::Explicit::Node &) = 0;
+  /**
+   * @brief Visit a Dequantize node
+   * @param[in] node Dequantize node to visit
+   * @return N/A
+   */
   virtual void visit(const Dequantize::Node &) = 0;
+  /**
+   * @brief Visit a MaxPool2D node with implicit padding
+   * @param[in] node MaxPool2D node to visit
+   * @return N/A
+   */
   virtual void visit(const MaxPool2D::Implicit::Node &) = 0;
+  /**
+   * @brief Visit a MaxPool2D node with explicit padding
+   * @param[in] node MaxPool2D node to visit
+   * @return N/A
+   */
   virtual void visit(const MaxPool2D::Explicit::Node &) = 0;
+  /**
+   * @brief Visit an AvgPool2D node with implicit padding
+   * @param[in] node AvgPool2D node to visit
+   * @return N/A
+   */
   virtual void visit(const AvgPool2D::Implicit::Node &) = 0;
+  /**
+   * @brief Visit an AvgPool2D node with explicit padding
+   * @param[in] node AvgPool2D node to visit
+   * @return N/A
+   */
   virtual void visit(const AvgPool2D::Explicit::Node &) = 0;
+  /**
+   * @brief Visit a Concat node
+   * @param[in] node Concat node to visit
+   * @return N/A
+   */
   virtual void visit(const Concat::Node &) = 0;
+  /**
+   * @brief Visit a ArgMax node
+   * @param[in] node ArgMax node to visit
+   * @return N/A
+   */
+  virtual void visit(const ArgMax::Node &) = 0;
+  /**
+   * @brief Visit an Reshape node
+   * @param[in] node Reshape node to visit
+   * @return N/A
+   */
   virtual void visit(const Reshape::Node &) = 0;
+  /**
+   * @brief Visit an ResizeBilinear node
+   * @param[in] node ResizeBilinear node to visit
+   * @return N/A
+   */
   virtual void visit(const ResizeBilinear::Node &) = 0;
+  /**
+   * @brief Visit a StridedSlice node
+   * @param[in] node StridedSlice node to visit
+   * @return N/A
+   */
   virtual void visit(const StridedSlice::Node &) = 0;
+  /**
+   * @brief Visit a FullyConnected node
+   * @param[in] node FullyConnected node to visit
+   * @return N/A
+   */
   virtual void visit(const FullyConnected::Node &) = 0;
+  /**
+   * @brief Visit a Softmax node
+   * @param[in] node Softmax node to visit
+   * @return N/A
+   */
   virtual void visit(const Softmax::Node &) = 0;
+  /**
+   * @brief Visit a ReduceMax node
+   * @param[in] node ReduceMax node to visit
+   * @return N/A
+   */
   virtual void visit(const ReduceMax::Node &) = 0;
+  /**
+   * @brief Visit a ReduceMin node
+   * @param[in] node ReduceMin node to visit
+   * @return N/A
+   */
+  virtual void visit(const ReduceMin::Node &) = 0;
+  /**
+   * @brief Visit a Cast node
+   * @param[in] node Cast node to visit
+   * @return N/A
+   */
   virtual void visit(const Cast::Node &) = 0;
+  /**
+   * @brief Visit a TopKV2 node
+   * @param[in] node TopKV2 node to visit
+   * @return N/A
+   */
   virtual void visit(const TopKV2::Node &) = 0;
+  /**
+   * @brief Visit a Gather node
+   * @param[in] node Gather node to visit
+   * @return N/A
+   */
   virtual void visit(const Gather::Node &) = 0;
+  /**
+   * @brief Visit an PReLU node
+   * @param[in] node PReLU node to visit
+   * @return N/A
+   */
+  virtual void visit(const PReLU::Node &) = 0;
+  /**
+   * @brief Visit an ReLU node
+   * @param[in] node Relu node to visit
+   * @return N/A
+   */
   virtual void visit(const ReLU::Node &) = 0;
+  /**
+   * @brief Visit a ReLU1 node
+   * @param[in] node ReLU1 node to visit
+   * @return N/A
+   */
   virtual void visit(const ReLU1::Node &) = 0;
+  /**
+   * @brief Visit a ReLU6 node
+   * @param[in] node ReLU6 node to visit
+   * @return N/A
+   */
   virtual void visit(const ReLU6::Node &) = 0;
+  /**
+   * @brief Visit a Tanh node
+   * @param[in] node Tanh node to visit
+   * @return N/A
+   */
   virtual void visit(const Tanh::Node &) = 0;
+  /**
+   * @brief Visit a Squeeze node
+   * @param[in] node Squeeze node to visit
+   * @return N/A
+   */
   virtual void visit(const Squeeze::Node &) = 0;
+  /**
+   * @brief Visit an Logistic node
+   * @param[in] node Logistic node to visit
+   * @return N/A
+   */
   virtual void visit(const Logistic::Node &) = 0;
+  /**
+   * @brief Visit a Mean node
+   * @param[in] node Mean node to visit
+   * @return N/A
+   */
   virtual void visit(const Mean::Node &) = 0;
+  /**
+   * @brief Visit an RNN node
+   * @param[in] node RNN node to visit
+   * @return N/A
+   */
   virtual void visit(const RNN::Node &) = 0;
+  /**
+   * @brief Visit a Transpose node
+   * @param[in] node Transpose node to visit
+   * @return N/A
+   */
   virtual void visit(const Transpose::Node &) = 0;
+  /**
+   * @brief Visit an LSTM node
+   * @param[in] node LSTM node to visit
+   * @return N/A
+   */
   virtual void visit(const LSTM::Node &) = 0;
+  /**
+   * @brief Visit a Floor node
+   * @param[in] node Floor node to visit
+   * @return N/A
+   */
   virtual void visit(const Floor::Node &) = 0;
+  /**
+   * @brief Visit a Split node
+   * @param[in] node Split node to visit
+   * @return N/A
+   */
   virtual void visit(const Split::Node &) = 0;
+  /**
+   * @brief Visit an RSQRT node
+   * @param[in] node RSQRT node to visit
+   * @return N/A
+   */
   virtual void visit(const RSQRT::Node &) = 0;
+  /**
+   * @brief Visit an SQRT node
+   * @param[in] node SQRT node to visit
+   * @return N/A
+   */
+  virtual void visit(const SQRT::Node &) = 0;
+  /**
+   * @brief Visit a Pad node
+   * @param[in] node Pad node to visit
+   * @return N/A
+   */
   virtual void visit(const Pad::Node &) = 0;
+  /**
+   * @brief Visit a SpaceToDepth node
+   * @param[in] node SpaceToDepth node to visit
+   * @return N/A
+   */
   virtual void visit(const SpaceToDepth::Node &) = 0;
+  /**
+   * @brief Visit a SpaceToBatchND node
+   * @param[in] node SpaceToBatchND node to visit
+   * @return N/A
+   */
+  virtual void visit(const SpaceToBatchND::Node &) = 0;
+  /**
+   * @brief Visit an L2Pool2D node with implicit padding
+   * @param[in] node L2Pool2D node to visit
+   * @return N/A
+   */
   virtual void visit(const L2Pool2D::Implicit::Node &) = 0;
+  /**
+   * @brief Visit an L2Pool2D node with explicit padding
+   * @param[in] node L2Pool2D node to visit
+   * @return N/A
+   */
   virtual void visit(const L2Pool2D::Explicit::Node &) = 0;
+  /**
+   * @brief Visit an EmbeddingLookup node
+   * @param[in] node EmbeddingLookup node to visit
+   * @return N/A
+   */
   virtual void visit(const EmbeddingLookup::Node &) = 0;
+  /**
+   * @brief Visit a HashtableLookup node
+   * @param[in] node HashtableLookup node to visit
+   * @return N/A
+   */
   virtual void visit(const HashtableLookup::Node &) = 0;
+  /**
+   * @brief Visit an L2Normalization node
+   * @param[in] node L2Normalization node to visit
+   * @return N/A
+   */
   virtual void visit(const L2Normalization::Node &) = 0;
+  /**
+   * @brief Visit a SquaredDifference node
+   * @param[in] node SquaredDifference node to visit
+   * @return N/A
+   */
   virtual void visit(const SquaredDifference::Node &) = 0;
+  /**
+   * @brief Visit a LocalResponseNormalization node
+   * @param[in] node LocalResponseNormalization node to visit
+   * @return N/A
+   */
+  virtual void visit(const LocalResponseNormalization::Node &) = 0;
+  /**
+   * @brief Visit a DepthToSpace node
+   * @param[in] node DepthToSpace node to visit
+   * @return N/A
+   */
+  virtual void visit(const DepthToSpace::Node &) = 0;
+  /**
+   * @brief Visit a Unpack node
+   * @param[in] node Unpack node to visit
+   * @return N/A
+   */
+  virtual void visit(const Unpack::Node &) = 0;
+  /**
+   * @brief Visit a Neg node
+   * @param[in] node Neg node to visit
+   * @return N/A
+   */
+  virtual void visit(const Neg::Node &) = 0;
+  /**
+   * @brief Visit a Exp node
+   * @param[in] node Exp node to visit
+   * @return N/A
+   */
+  virtual void visit(const Exp::Node &) = 0;
+  /**
+   * @brief Visit a ReduceSum node
+   * @param[in] node ReduceSum node to visit
+   * @return N/A
+   */
+  virtual void visit(const ReduceSum::Node &) = 0;
+  /**
+   * @brief Visit a Equal node
+   * @param[in] node Equal node to visit
+   * @return N/A
+   */
+  virtual void visit(const Equal::Node &) = 0;
+  /**
+   * @brief Visit a BatchToSpaceNd node
+   * @param[in] node BatchToSpaceNd node to visit
+   * @return N/A
+   */
+  virtual void visit(const BatchToSpaceNd::Node &) = 0;
+  /**
+   * @brief Visit a TransposeConv node
+   * @param[in] node TransposeConv node to visit
+   * @return N/A
+   */
+  virtual void visit(const TransposeConv::Node &) = 0;
+  /**
+   * @brief Visit a Pack node
+   * @param[in] node Pack node to visit
+   * @return N/A
+   */
+  virtual void visit(const Pack::Node &) = 0;
+  /**
+   * @brief Visit a Abs node
+   * @param[in] node Abs node to visit
+   * @return N/A
+   */
+  virtual void visit(const Abs::Node &) = 0;
+  /**
+   * @brief Visit a NotEqual node
+   * @param[in] node NotEqual node to visit
+   * @return N/A
+   */
+  virtual void visit(const NotEqual::Node &) = 0;
+  /**
+   * @brief Visit a LogicalAnd node
+   * @param[in] node LogicalAnd node to visit
+   * @return N/A
+   */
+  virtual void visit(const LogicalAnd::Node &) = 0;
+  /**
+   * @brief Visit a LogicalNot node
+   * @param[in] node LogicalNot node to visit
+   * @return N/A
+   */
+  virtual void visit(const LogicalNot::Node &) = 0;
+  /**
+   * @brief Visit a LogicalOr node
+   * @param[in] node LogicalOr node to visit
+   * @return N/A
+   */
+  virtual void visit(const LogicalOr::Node &) = 0;
 };
 
 } // namespace op
diff --git a/runtimes/pure_arm_compute/src/internal/op/NotEqual.cc b/runtimes/pure_arm_compute/src/internal/op/NotEqual.cc
new file mode 100644
index 000000000..2906e214b
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/NotEqual.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/NotEqual.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace NotEqual
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace NotEqual
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace NotEqual
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+             const uint32_t *outputs)
+{
+  assert(inputCount == 2 && outputCount == 1);
+
+  output_index = outputs[0];
+
+  // Each input should be interpreted as follows:
+  //
+  //  0 -> input1 Tensor Index
+  //  1 -> input2 Tensor Index
+  input1_index = inputs[0];
+  input2_index = inputs[1];
+}
+
+} // namespace NotEqual
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/NotEqual.h b/runtimes/pure_arm_compute/src/internal/op/NotEqual.h
new file mode 100644
index 000000000..0d6130948
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/NotEqual.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_NOT_EQUAL_H__
+#define __INTERNAL_OP_NOT_EQUAL_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace NotEqual
+{
+
+struct Param
+{
+  int32_t output_index;
+
+  int32_t input1_index;
+  int32_t input2_index;
+
+  Param() = default;
+  Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+} // namespace NotEqual
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace NotEqual
+{
+class Node final : public op::Node
+{
+public:
+  Node(const Param &param) : _param(param)
+  {
+    // DO NOTHING
+  }
+
+public:
+  virtual ~Node() = default;
+
+public:
+  const Param &param(void) const { return _param; }
+
+public:
+  void accept(NodeVisitor &&) const override;
+
+private:
+  const Param _param;
+};
+
+} // namespace NotEqual
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_NOT_EQUAL_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/PReLU.cc b/runtimes/pure_arm_compute/src/internal/op/PReLU.cc
new file mode 100644
index 000000000..25b06505b
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/PReLU.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/PReLU.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace PReLU
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace PReLU
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace PReLU
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+             const uint32_t *outputs)
+{
+  assert(inputCount == 2 && outputCount == 1);
+
+  ofm_index = outputs[0];
+
+  // Each input should be interpreted as follows:
+  //
+  //  0 -> input Tensor Index
+  //  1 -> alpha Tensor Index
+  ifm_index = inputs[0];
+  alpha_index = inputs[1];
+}
+
+} // namespace PReLU
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/PReLU.h b/runtimes/pure_arm_compute/src/internal/op/PReLU.h
new file mode 100644
index 000000000..ae754abb4
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/PReLU.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file ReLU.h
+ * @brief This file contains accept function and params for ReLU operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
+#ifndef __INTERNAL_OP_PRELU_H__
+#define __INTERNAL_OP_PRELU_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace PReLU
+{
+
+/**
+ * @brief Struct of PReLU operation's param
+ */
+struct Param
+{
+  int32_t ofm_index; /**< Output format index */
+
+  int32_t ifm_index;   /**< Input format index */
+  int32_t alpha_index; /**< Alpha input index */
+
+  /**
+   * @brief Construct a new Param object for ReLU as default
+   */
+  Param() = default;
+
+  /**
+   * @brief Construct a new Param object for PReLU with params
+   * @param [in] inputCount The number of input
+   * @param [in] inputs Array containing inputs
+   * @param [in] outputCount The number of output
+   * @param [in] outputs Array containing outputs
+   */
+  Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to define operation node for PReLU
+ */
+class Node final : public op::Node
+{
+public:
+  /**
+   * @brief Construct a new Node object for PReLU with param
+   * @param [in] param Parameters for Node
+   */
+  Node(const Param &param) : _param(param)
+  {
+    // DO NOTHING
+  }
+
+public:
+  /**
+   * @brief Destroy the Node object for PReLU
+   */
+  virtual ~Node() = default;
+
+public:
+  /**
+   * @brief Get parameters for PReLU
+   * @return Parameters of PReLU
+   */
+  const Param &param(void) const { return _param; }
+
+public:
+  /**
+   * @brief Function for accepting node for PReLU
+   * @param [in] v Node visitor for invoking visit function of PReLU
+   * @return N/A
+   */
+  void accept(NodeVisitor &&) const override;
+
+private:
+  const Param _param;
+};
+
+} // namespace PReLU
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_PRELU_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/Pack.cc b/runtimes/pure_arm_compute/src/internal/op/Pack.cc
new file mode 100644
index 000000000..73f89b840
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/Pack.cc
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Pack.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Pack
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Pack
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Pack
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+             const uint32_t *outputs)
+{
+  assert(outputCount == 1);
+
+  // Each input should be interpreted as follows:
+  //
+  //  0 .. n - 3 -> Input Tensor Index
+  //  n - 2      -> Input Tensor counts (will be ignored)
+  //  n - 1      -> Input Axis Index
+  ofm_index = outputs[0];
+  axis_index = inputs[inputCount - 1];
+  // last input is axis along which packing is required
+  for (uint32_t n = 0; n < inputCount - 2; ++n)
+  {
+    ifm_indexes.emplace_back(inputs[n]);
+  }
+}
+
+} // namespace Pack
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/Pack.h b/runtimes/pure_arm_compute/src/internal/op/Pack.h
new file mode 100644
index 000000000..c5de01bd8
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/Pack.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_PACK_H__
+#define __INTERNAL_OP_PACK_H__
+
+#include "internal/op/Node.h"
+#include <vector>
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Pack
+{
+
+struct Param
+{
+  int32_t ofm_index;
+  //  There are N+1 inputs, 0 to N-1 are tensors of same shape
+  // Nth input is axis index along which stack is needed to be done.
+  std::vector<int32_t> ifm_indexes;
+  int32_t axis_index;
+
+  Param() = default;
+  Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+  Node(const Param &param) : _param(param)
+  {
+    // DO NOTHING
+  }
+
+public:
+  virtual ~Node() = default;
+
+public:
+  const Param &param(void) const { return _param; }
+
+public:
+  void accept(NodeVisitor &&) const override;
+
+private:
+  const Param _param;
+};
+
+} // namespace Pack
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_PACK_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/Pad.cc b/runtimes/pure_arm_compute/src/internal/op/Pad.cc
index 24d08bf36..00938242b 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Pad.cc
+++ b/runtimes/pure_arm_compute/src/internal/op/Pad.cc
@@ -1,63 +1,63 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "internal/op/Pad.h"
-#include "internal/op/NodeVisitor.h"
-
-#include <cassert>
-
-namespace internal
-{
-namespace tflite
-{
-namespace op
-{
-namespace Pad
-{
-
-void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
-
-} // namespace Pad
-} // namespace op
-} // namespace tflite
-} // namespace internal
-
-namespace internal
-{
-namespace tflite
-{
-namespace op
-{
-namespace Pad
-{
-
-Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
-             const uint32_t *outputs)
-{
-  assert(inputCount == 2 && outputCount == 1);
-  ofm_index = outputs[0];
-
-  // Each input should be interpreted as follows:
-  //
-  //  0 -> input Tensor Index
-  //  1 -> paddings
-  ifm_index = inputs[0];
-  paddings_index = inputs[1];
-}
-} // namespace Pad
-} // namespace op
-} // namespace tflite
-} // namespace internal
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Pad.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Pad
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Pad
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Pad
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+             const uint32_t *outputs)
+{
+  assert(inputCount == 2 && outputCount == 1);
+  ofm_index = outputs[0];
+
+  // Each input should be interpreted as follows:
+  //
+  //  0 -> input Tensor Index
+  //  1 -> paddings
+  ifm_index = inputs[0];
+  paddings_index = inputs[1];
+}
+} // namespace Pad
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/Pad.h b/runtimes/pure_arm_compute/src/internal/op/Pad.h
index e3ddae44c..68752a10e 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Pad.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Pad.h
@@ -1,69 +1,107 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __INTERNAL_OP_PAD_H__
-#define __INTERNAL_OP_PAD_H__
-
-#include "internal/op/Node.h"
-
-#include <cstdint>
-
-namespace internal
-{
-namespace tflite
-{
-namespace op
-{
-namespace Pad
-{
-
-struct Param
-{
-  int32_t ifm_index;
-  int32_t paddings_index;
-  int32_t ofm_index;
-
-  Param() = default;
-  Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
-};
-
-class Node final : public op::Node
-{
-public:
-  Node(const Param &param) : _param(param)
-  {
-    // DO NOTHING
-  }
-
-public:
-  virtual ~Node() = default;
-
-public:
-  const Param &param(void) const { return _param; }
-
-public:
-  void accept(NodeVisitor &&) const override;
-
-private:
-  const Param _param;
-};
-
-} // namespace Pad
-} // namespace op
-} // namespace tflite
-} // namespace internal
-
-#endif // __INTERNAL_OP_PAD_H_
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file Pad.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines Pad node
+ */
+
+#ifndef __INTERNAL_OP_PAD_H__
+#define __INTERNAL_OP_PAD_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Pad
+{
+
+/**
+ * @brief Struct to manipulate parameter for Pad operation
+ */
+struct Param
+{
+  int32_t ifm_index;      //!< index for input
+  int32_t paddings_index; //!< index for padding
+  int32_t ofm_index;      //!< index for output
+
+  /**
+   * @brief Default Constructor
+   */
+  Param() = default;
+  /**
+   * @brief Construct a new Param object
+   * @param[in] inputCount the number of inputs
+   * @param[in] inputs pointer for input data
+   * @param[in] outputCount the number of outputs
+   * @param[in] outputs pointer for input data
+   */
+  Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to define Pad Operation
+ */
+class Node final : public op::Node
+{
+public:
+  /**
+   * @brief Construct a new RNN Node object
+   * @param param Parameter for RNN Node
+   */
+  Node(const Param &param) : _param(param)
+  {
+    // DO NOTHING
+  }
+
+public:
+  /**
+   * @brief Default Destructor
+   */
+  virtual ~Node() = default;
+
+public:
+  /**
+   * @brief Get parameter
+   * @return Param reference
+   */
+  const Param &param(void) const { return _param; }
+
+public:
+  /**
+   * @brief Accept a NodeVisitor so that it can visit this node
+   * @param [in] v Visitor
+   * @return N/A
+   */
+  void accept(NodeVisitor &&) const override;
+
+private:
+  const Param _param; //!< parameter for Pad node
+};
+
+} // namespace Pad
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_PAD_H_
diff --git a/runtimes/pure_arm_compute/src/internal/op/RSQRT.h b/runtimes/pure_arm_compute/src/internal/op/RSQRT.h
index e384b27f2..e39d60241 100644
--- a/runtimes/pure_arm_compute/src/internal/op/RSQRT.h
+++ b/runtimes/pure_arm_compute/src/internal/op/RSQRT.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file    RSQRT.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines internal::tflite::op::RSQRT::Param struct
+ *          and internal::tflite::op::RSQRT::Node class
+ */
 #ifndef __INTERNAL_OP_RSQRT_H__
 #define __INTERNAL_OP_RSQRT_H__
 
@@ -30,31 +36,61 @@ namespace op
 namespace RSQRT
 {
 
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
 struct Param
 {
-  int32_t output_index;
-
-  int32_t input_index;
+  int32_t output_index; /**< Index of output feature map */
 
+  int32_t input_index; /**< Index of input feature map */
+  /**
+   * @brief Construct as default
+   */
   Param() = default;
+  /**
+   * @brief     Construct a new Param object with params
+   * @param[in] inputCount  Count of inputs
+   * @param[in] inputs      Pointer of inputs
+   * @param[in] outputCount Count of outputs
+   * @param[in] outputs     Pointer of outputs
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to represent an operation of data structure
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief     Construct a new Node object with param
+   * @param[in] param Param object that makes up a Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Destruct as default
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief  Get a reference of Param object
+   * @return Reference of Param object
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief  Visit this Node by NodeVisitor
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/ReLU.h b/runtimes/pure_arm_compute/src/internal/op/ReLU.h
index 64dcf2e14..aaa39b523 100644
--- a/runtimes/pure_arm_compute/src/internal/op/ReLU.h
+++ b/runtimes/pure_arm_compute/src/internal/op/ReLU.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file ReLU.h
+ * @brief This file contains accept function and params for ReLU operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
 #ifndef __INTERNAL_OP_RELU_H__
 #define __INTERNAL_OP_RELU_H__
 
@@ -30,31 +36,64 @@ namespace op
 namespace ReLU
 {
 
+/**
+ * @brief Struct of ReLU operation's param
+ */
 struct Param
 {
-  int32_t ofm_index;
+  int32_t ofm_index; /**< Output format index */
 
-  int32_t ifm_index;
+  int32_t ifm_index; /**< Input format index */
 
+  /**
+   * @brief Construct a new Param object for ReLU as default
+   */
   Param() = default;
+
+  /**
+   * @brief Construct a new Param object for ReLU with params
+   * @param [in] inputCount The number of input
+   * @param [in] inputs Array containing inputs
+   * @param [in] outputCount The number of output
+   * @param [in] outputs Array containing outputs
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to define operation node for ReLU
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Node object for ReLU with param
+   * @param [in] param Parameters for Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Destroy the Node object for ReLU
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief Get parameters for ReLU
+   * @return Parameters of ReLU
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Function for accepting node for ReLU
+   * @param [in] v Node visitor for invoking visit function of ReLU
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/ReLU1.h b/runtimes/pure_arm_compute/src/internal/op/ReLU1.h
index 997a9faff..330445af8 100644
--- a/runtimes/pure_arm_compute/src/internal/op/ReLU1.h
+++ b/runtimes/pure_arm_compute/src/internal/op/ReLU1.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file ReLU1.h
+ * @brief This file contains accept function and params for ReLU1 operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
 #ifndef __INTERNAL_OP_RELU1_H__
 #define __INTERNAL_OP_RELU1_H__
 
@@ -30,31 +36,64 @@ namespace op
 namespace ReLU1
 {
 
+/**
+ * @brief Struct of ReLU1 operation's param
+ */
 struct Param
 {
-  int32_t ofm_index;
+  int32_t ofm_index; /**< Output format index */
 
-  int32_t ifm_index;
+  int32_t ifm_index; /**< Input format index */
 
+  /**
+   * @brief Construct a new Param object for ReLU1 as default
+   */
   Param() = default;
+
+  /**
+   * @brief Construct a new Param object for ReLU1 with params
+   * @param [in] inputCount The number of input
+   * @param [in] inputs Array containing inputs
+   * @param [in] outputCount The number of output
+   * @param [in] outputs Array containing outputs
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to define operation node for ReLU1
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Node object for ReLU1 with param
+   * @param [in] param Parameters for Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Destroy the Node object for ReLU1
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief Get parameters for ReLU1
+   * @return Parameters of ReLU1
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Function for accepting node for ReLU1
+   * @param [in] v Node visitor for invoking visit function of ReLU1
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/ReLU6.h b/runtimes/pure_arm_compute/src/internal/op/ReLU6.h
index 77c55b64c..6fc2c24fe 100644
--- a/runtimes/pure_arm_compute/src/internal/op/ReLU6.h
+++ b/runtimes/pure_arm_compute/src/internal/op/ReLU6.h
@@ -14,6 +14,11 @@
  * limitations under the License.
  */
 
+/**
+ * @file    ReLU6.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines internal::tflite::op::ReLU6 class
+ */
 #ifndef __INTERNAL_OP_RELU6_H__
 #define __INTERNAL_OP_RELU6_H__
 
@@ -30,31 +35,61 @@ namespace op
 namespace ReLU6
 {
 
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
 struct Param
 {
-  int32_t ofm_index;
-
-  int32_t ifm_index;
+  int32_t ofm_index; /** Index of output feature map */
 
+  int32_t ifm_index; /** Index of input feature map */
+  /**
+   * @brief Construct as default
+   */
   Param() = default;
+  /**
+   * @brief     Construct a new Param object with params
+   * @param[in] inputCount  Count of inputs
+   * @param[in] inputs      Pointer of inputs
+   * @param[in] outputCount Count of outputs
+   * @param[in] outputs     Pointer of outputs
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to represent an operation of data structure
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief     Construct a new Node object with param
+   * @param[in] param Param object that makes up a Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Destruct as default
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief  Get a reference of Param object
+   * @return Reference of Param object
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief  Visit this Node by NodeVisitor
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/ReduceMax.h b/runtimes/pure_arm_compute/src/internal/op/ReduceMax.h
index e3278aacf..77d8bd869 100644
--- a/runtimes/pure_arm_compute/src/internal/op/ReduceMax.h
+++ b/runtimes/pure_arm_compute/src/internal/op/ReduceMax.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file    ReduceMax.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines internal::tflite::op::ReduceMax::Param struct
+ *          and internal::tflite::op::ReduceMax::Node class
+ */
 #ifndef __INTERNAL_OP_REDUCEMAX_H__
 #define __INTERNAL_OP_REDUCEMAX_H__
 
@@ -30,32 +36,63 @@ namespace op
 namespace ReduceMax
 {
 
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
 struct Param
 {
-  int32_t ofm_index;
-
-  int32_t ifm_index;
-  int32_t axis_index;
+  int32_t ofm_index; /**< Index of output feature map */
 
+  int32_t ifm_index;  /**< Index of input feature map */
+  int32_t axis_index; /**< Index of axis */
+  /**
+   * @brief Construct as default
+   */
   Param() = default;
+  /**
+   * @brief Construct a new Param object with params
+   * @param[in] inputCount  Count of inputs
+   * @param[in] inputs      Pointer of inputs
+   * @param[in] outputCount Count of outputs
+   * @param[in] outputs     Pointer of outputs
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to represent an operation of data structure
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Node object with param
+   * @param[in] param Param object that makes up a Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Destruct as default
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief  Get a reference of Param object
+   * @return Reference of Param object
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Visit this Node by NodeVisitor
+   * @param[in] v Visitor
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/ReduceMin.cc b/runtimes/pure_arm_compute/src/internal/op/ReduceMin.cc
new file mode 100644
index 000000000..72b6079d4
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/ReduceMin.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/ReduceMin.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ReduceMin
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace ReduceMin
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ReduceMin
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+             const uint32_t *outputs)
+{
+  assert(inputCount == 2 && outputCount == 1);
+
+  ofm_index = outputs[0];
+
+  // Each input should be interpreted as follows:
+  //
+  //  0 -> Input Tensor Index
+  //  1 -> Axis Tensor Index
+  ifm_index = inputs[0];
+  axis_index = inputs[1];
+}
+
+} // namespace ReduceMin
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/ReduceMin.h b/runtimes/pure_arm_compute/src/internal/op/ReduceMin.h
new file mode 100644
index 000000000..5dd82ec43
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/ReduceMin.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file    ReduceMin.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines internal::tflite::op::ReduceMin::Param struct
+ *          and internal::tflite::op::ReduceMin::Node class
+ */
+#ifndef __INTERNAL_OP_REDUCEMIN_H__
+#define __INTERNAL_OP_REDUCEMIN_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ReduceMin
+{
+
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
+struct Param
+{
+  int32_t ofm_index; /**< Index of output feature map */
+
+  int32_t ifm_index;  /**< Index of input feature map */
+  int32_t axis_index; /**< Index of axis */
+  /**
+   * @brief Construct as default
+   */
+  Param() = default;
+  /**
+   * @brief Construct a new Param object with params
+   * @param[in] inputCount  Count of inputs
+   * @param[in] inputs      Pointer of inputs
+   * @param[in] outputCount Count of outputs
+   * @param[in] outputs     Pointer of outputs
+   */
+  Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to represent an operation of data structure
+ */
+class Node final : public op::Node
+{
+public:
+  /**
+   * @brief Construct a new Node object with param
+   * @param[in] param Param object that makes up a Node
+   */
+  Node(const Param &param) : _param(param)
+  {
+    // DO NOTHING
+  }
+
+public:
+  /**
+   * @brief Destruct as default
+   */
+  virtual ~Node() = default;
+
+public:
+  /**
+   * @brief  Get a reference of Param object
+   * @return Reference of Param object
+   */
+  const Param &param(void) const { return _param; }
+
+public:
+  /**
+   * @brief Visit this Node by NodeVisitor
+   * @param[in] v Visitor
+   * @return N/A
+   */
+  void accept(NodeVisitor &&) const override;
+
+private:
+  const Param _param;
+};
+
+} // namespace ReduceMin
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_REDUCEMIN_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/ReduceSum.cc b/runtimes/pure_arm_compute/src/internal/op/ReduceSum.cc
new file mode 100644
index 000000000..4d83c1734
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/ReduceSum.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/ReduceSum.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ReduceSum
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace ReduceSum
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ReduceSum
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+             const uint32_t *outputs)
+{
+  assert(inputCount == 2 && outputCount == 1);
+
+  ofm_index = outputs[0];
+
+  // Each input should be interpreted as follows:
+  //
+  //  0 -> Input Tensor Index
+  //  1 -> Axis Tensor Index
+  ifm_index = inputs[0];
+  axis_index = inputs[1];
+}
+
+} // namespace ReduceSum
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/ReduceSum.h b/runtimes/pure_arm_compute/src/internal/op/ReduceSum.h
new file mode 100644
index 000000000..9c661f63a
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/ReduceSum.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_REDUCESUM_H__
+#define __INTERNAL_OP_REDUCESUM_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace ReduceSum
+{
+
+struct Param
+{
+  int32_t ofm_index;
+
+  int32_t ifm_index;
+  int32_t axis_index;
+
+  Param() = default;
+  Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+  Node(const Param &param) : _param(param)
+  {
+    // DO NOTHING
+  }
+
+public:
+  virtual ~Node() = default;
+
+public:
+  const Param &param(void) const { return _param; }
+
+public:
+  void accept(NodeVisitor &&) const override;
+
+private:
+  const Param _param;
+};
+
+} // namespace ReduceSum
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_REDUCESUM_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/Reshape.h b/runtimes/pure_arm_compute/src/internal/op/Reshape.h
index ab77ade8c..7152eaece 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Reshape.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Reshape.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file Reshape.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines Reshape node
+ */
+
 #ifndef __INTERNAL_OP_RESHAPE_H__
 #define __INTERNAL_OP_RESHAPE_H__
 
@@ -30,36 +36,68 @@ namespace op
 namespace Reshape
 {
 
+/**
+ * @brief Struct to manipulate parameter for Reshape operation
+ */
 struct Param
 {
-  int32_t output_index;
+  int32_t output_index; //!< index for output feature map
 
-  int32_t input_index;
-  int32_t shape_index;
+  int32_t input_index; //!< index for input feature map
+  int32_t shape_index; //!< index for shape
 
+  /**
+   * @brief Default Constructor
+   */
   Param() = default;
+  /**
+   * @brief Construct a new Param object
+   * @param[in] inputCount the number of inputs
+   * @param[in] inputs pointer for input data
+   * @param[in] outputCount the number of outputs
+   * @param[in] outputs pointer for input data
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to define Reshape Operation
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Reshape Node object
+   * @param param Parameter for Reshape Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Default Destructor
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief Get parameter
+   * @return Param reference
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Accept a NodeVisitor so that it can visit this node
+   * @param [in] v Visitor
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
-  const Param _param;
+  const Param _param; //!< parameter for Reshape node
 };
 
 } // namespace Reshape
diff --git a/runtimes/pure_arm_compute/src/internal/op/ResizeBilinear.h b/runtimes/pure_arm_compute/src/internal/op/ResizeBilinear.h
index bf216b75f..f2eab4aaf 100644
--- a/runtimes/pure_arm_compute/src/internal/op/ResizeBilinear.h
+++ b/runtimes/pure_arm_compute/src/internal/op/ResizeBilinear.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file    ResizeBilinear.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines internal::tflite::op::ResizeBilinear::Param struct
+ *          and internal::tflite::op::ResizeBilinear::Node class
+ */
 #ifndef __INTERNAL_OP_RESIZE_BILINEAR_H__
 #define __INTERNAL_OP_RESIZE_BILINEAR_H__
 
@@ -30,33 +36,64 @@ namespace op
 namespace ResizeBilinear
 {
 
+/**
+ * @brief Struct to have indexes for ResizeBilinear operation parameter
+ */
 struct Param
 {
-  int32_t ofm_index;
-
-  int32_t ifm_index;
-  int32_t height_index;
-  int32_t width_index;
+  int32_t ofm_index; /**< Index of output feature map */
 
+  int32_t ifm_index;    /**< Index of input feature map */
+  int32_t height_index; /**< Index of height */
+  int32_t width_index;  /**< Index of width */
+  /**
+   * @brief Construct as default
+   */
   Param() = default;
+  /**
+   * @brief Construct a new Param object with params
+   * @param[in] inputCount  Count of inputs
+   * @param[in] inputs      Pointer of inputs
+   * @param[in] outputCount Count of outputs
+   * @param[in] outputs     Pointer of outputs
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to represent an ResizeBilinear operation of data structure
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Node object with param
+   * @param[in] param Param object that makes up a Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Destruct as default
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief  Get a reference of Param object
+   * @return Reference of Param object
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Visit this Node by NodeVisitor
+   * @param[in] v Visitor
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/Rnn.h b/runtimes/pure_arm_compute/src/internal/op/Rnn.h
index c436a0987..7b2a10843 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Rnn.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Rnn.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file Rnn.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines RNN node
+ */
+
 #ifndef __INTERNAL_OP_RNN_H__
 #define __INTERNAL_OP_RNN_H__
 
@@ -30,41 +36,73 @@ namespace op
 namespace RNN
 {
 
+/**
+ * @brief Struct to manipulate parameter for RNN operation
+ */
 struct Param
 {
-  int32_t output_index;
-  int32_t hidden_state_out_index;
+  int32_t output_index;           //!< index for ouuput
+  int32_t hidden_state_out_index; //!< index for hidden state output
 
-  int32_t input_index;
-  int32_t weights_index;
-  int32_t recurrent_weights_index;
-  int32_t bias_index;
-  int32_t hidden_state_in_index;
-  int32_t fused_activation_index;
+  int32_t input_index;             //!< index for input
+  int32_t weights_index;           //!< index for weight
+  int32_t recurrent_weights_index; //!< index for recurrent weights
+  int32_t bias_index;              //!< index for bias
+  int32_t hidden_state_in_index;   //!< index for hidden state input
+  int32_t fused_activation_index;  //!< index for fused activation
 
+  /**
+   * @brief Default Constructor
+   */
   Param() = default;
+  /**
+   * @brief Construct a new Param object
+   * @param[in] inputCount the number of inputs
+   * @param[in] inputs pointer for input data
+   * @param[in] outputCount the number of outputs
+   * @param[in] outputs pointer for input data
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to define RNN Operation
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new RNN Node object
+   * @param param Parameter for RNN Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Default Destructor
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief Get parameter
+   * @return Param reference
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Accept a NodeVisitor so that it can visit this node
+   * @param [in] v Visitor
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
-  const Param _param;
+  const Param _param; //!< parameter for RNN node
 };
 
 } // namespace RNN
diff --git a/runtimes/pure_arm_compute/src/internal/op/SQRT.cc b/runtimes/pure_arm_compute/src/internal/op/SQRT.cc
new file mode 100644
index 000000000..70ce42e9c
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/SQRT.cc
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/SQRT.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace SQRT
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace SQRT
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace SQRT
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+             const uint32_t *outputs)
+{
+  assert(inputCount == 1 && outputCount == 1);
+
+  output_index = outputs[0];
+
+  // Each input should be interpreted as follows:
+  //  0 -> input Tensor Index
+  input_index = inputs[0];
+}
+
+} // namespace SQRT
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/SQRT.h b/runtimes/pure_arm_compute/src/internal/op/SQRT.h
new file mode 100644
index 000000000..85dfb97a7
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/SQRT.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file    SQRT.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines internal::tflite::op::SQRT::Param struct
+ *          and internal::tflite::op::SQRT::Node class
+ */
+#ifndef __INTERNAL_OP_SQRT_H__
+#define __INTERNAL_OP_SQRT_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace SQRT
+{
+
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
+struct Param
+{
+  int32_t output_index; /**< Index of output feature map */
+
+  int32_t input_index; /**< Index of input feature map */
+  /**
+   * @brief Construct as default
+   */
+  Param() = default;
+  /**
+   * @brief     Construct a new Param object with params
+   * @param[in] inputCount  Count of inputs
+   * @param[in] inputs      Pointer of inputs
+   * @param[in] outputCount Count of outputs
+   * @param[in] outputs     Pointer of outputs
+   */
+  Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+/**
+ * @brief Class to represent an operation of data structure
+ */
+class Node final : public op::Node
+{
+public:
+  /**
+   * @brief     Construct a new Node object with param
+   * @param[in] param Param object that makes up a Node
+   */
+  Node(const Param &param) : _param(param)
+  {
+    // DO NOTHING
+  }
+
+public:
+  /**
+   * @brief Destruct as default
+   */
+  virtual ~Node() = default;
+
+public:
+  /**
+   * @brief  Get a reference of Param object
+   * @return Reference of Param object
+   */
+  const Param &param(void) const { return _param; }
+
+public:
+  /**
+   * @brief  Visit this Node by NodeVisitor
+   * @return N/A
+   */
+  void accept(NodeVisitor &&) const override;
+
+private:
+  const Param _param;
+};
+
+} // namespace SQRT
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_SQRT_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/Softmax.h b/runtimes/pure_arm_compute/src/internal/op/Softmax.h
index 746f6b4e6..6e631af5f 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Softmax.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Softmax.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file Softmax.h
+ * @brief This file contains accept function and params for Softmax operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
 #ifndef __INTERNAL_OP_SOFTMAX_H__
 #define __INTERNAL_OP_SOFTMAX_H__
 
@@ -30,32 +36,65 @@ namespace op
 namespace Softmax
 {
 
+/**
+ * @brief Struct of Softmax operation's param
+ */
 struct Param
 {
-  int32_t output_index;
+  int32_t output_index; /**< Output index */
 
-  int32_t input_index;
-  int32_t scale_index;
+  int32_t input_index; /**< Input index */
+  int32_t scale_index; /**< Scale index */
 
+  /**
+   * @brief Construct a new Param object for Softmax as default
+   */
   Param() = default;
+
+  /**
+   * @brief Construct a new Param object for Softmax with params
+   * @param [in] inputCount The number of input
+   * @param [in] inputs Array containing inputs
+   * @param [in] outputCount The number of output
+   * @param [in] outputs Array containing outputs
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to define operation node for Softmax
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Node object for Softmax with param
+   * @param [in] param Parameters for Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Destroy the Node object for Softmax
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief Get parameters for Softmax
+   * @return Parameters of Softmax
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Function for accepting node for Softmax
+   * @param [in] v Node visitor for invoking visit function of Softmax
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/SpaceToBatchND.cc b/runtimes/pure_arm_compute/src/internal/op/SpaceToBatchND.cc
new file mode 100644
index 000000000..9ab026cf4
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/SpaceToBatchND.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/SpaceToBatchND.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace SpaceToBatchND
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace SpaceToBatchND
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace SpaceToBatchND
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+             const uint32_t *outputs)
+{
+  assert(inputCount == 3 && outputCount == 1);
+
+  output_index = outputs[0];
+
+  // Each input should be interpreted as follows:
+  //
+  //  0 -> Input Tensor Index
+  //  1 -> Block size Index
+  //  2 -> Padding size Index
+  input_index = inputs[0];
+  block_size_index = inputs[1];
+  padding_size_index = inputs[2];
+}
+
+} // namespace SpaceToBatchND
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/SpaceToBatchND.h b/runtimes/pure_arm_compute/src/internal/op/SpaceToBatchND.h
new file mode 100644
index 000000000..650d068f4
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/SpaceToBatchND.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_SPACETOBATCHND_H__
+#define __INTERNAL_OP_SPACETOBATCHND_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace SpaceToBatchND
+{
+
+struct Param
+{
+  int32_t output_index;
+
+  int32_t input_index;
+  int32_t block_size_index;
+  int32_t padding_size_index;
+
+  Param() = default;
+  Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+  Node(const Param &param) : _param(param)
+  {
+    // DO NOTHING
+  }
+
+public:
+  virtual ~Node() = default;
+
+public:
+  const Param &param(void) const { return _param; }
+
+public:
+  void accept(NodeVisitor &&) const override;
+
+private:
+  const Param _param;
+};
+
+} // namespace SpaceToBatchND
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_SPACETOBATCHND_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/SpaceToDepth.h b/runtimes/pure_arm_compute/src/internal/op/SpaceToDepth.h
index 81bfe5246..2e624006a 100644
--- a/runtimes/pure_arm_compute/src/internal/op/SpaceToDepth.h
+++ b/runtimes/pure_arm_compute/src/internal/op/SpaceToDepth.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file SpaceToDepth.h
+ * @brief This file contains accept function and params for SpaceToDepth operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
 #ifndef __INTERNAL_OP_SPACETODEPTH_H__
 #define __INTERNAL_OP_SPACETODEPTH_H__
 
@@ -30,32 +36,65 @@ namespace op
 namespace SpaceToDepth
 {
 
+/**
+ * @brief Struct of SpaceToDepth operation's param
+ */
 struct Param
 {
-  int32_t output_index;
+  int32_t output_index; /**< Output index */
 
-  int32_t input_index;
-  int32_t block_size_index;
+  int32_t input_index;      /**< Input index */
+  int32_t block_size_index; /**< Block size index */
 
+  /**
+   * @brief Construct a new Param object for SpaceToDepth as default
+   */
   Param() = default;
+
+  /**
+   * @brief Construct a new Param object for SpaceToDepth with params
+   * @param [in] inputCount The number of input
+   * @param [in] inputs Array containing inputs
+   * @param [in] outputCount The number of output
+   * @param [in] outputs Array containing outputs
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to define operation node for SpaceToDepth
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Node object for SpaceToDepth with param
+   * @param [in] param Parameters for Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Destroy the Node object for SpaceToDepth
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief Get parameters for SpaceToDepth
+   * @return Parameters of SpaceToDepth
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Function for accepting node for SpaceToDepth
+   * @param [in] v Node visitor for invoking visit function of SpaceToDepth
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/Split.h b/runtimes/pure_arm_compute/src/internal/op/Split.h
index 8bea1000d..b2c6c2fd1 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Split.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Split.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file Split.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines Split node
+ */
+
 #ifndef __INTERNAL_OP_SPLIT_H__
 #define __INTERNAL_OP_SPLIT_H__
 
@@ -31,36 +37,68 @@ namespace op
 namespace Split
 {
 
+/**
+ * @brief Struct to manipulate parameter for Split operation
+ */
 struct Param
 {
-  int32_t axis_index;
-  int32_t ifm_index;
+  int32_t axis_index; //!< index for axis
+  int32_t ifm_index;  //!< index for input feature map
 
-  std::vector<int32_t> ofm_indexes;
+  std::vector<int32_t> ofm_indexes; //!< index for output feature map
 
+  /**
+   * @brief Default Constructor
+   */
   Param() = default;
+  /**
+   * @brief Construct a new Param object
+   * @param[in] inputCount the number of inputs
+   * @param[in] inputs pointer for input data
+   * @param[in] outputCount the number of outputs
+   * @param[in] outputs pointer for input data
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to define Split Operation
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Split Node object
+   * @param param Parameter for Split Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Default Destructor
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief Get parameter
+   * @return Param reference
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Accept a NodeVisitor so that it can visit this node
+   * @param [in] v Visitor
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
-  const Param _param;
+  const Param _param; //!< parameter for Split node
 };
 
 } // namespace Split
diff --git a/runtimes/pure_arm_compute/src/internal/op/SquaredDifference.cc b/runtimes/pure_arm_compute/src/internal/op/SquaredDifference.cc
index c2c4f7242..f6c8bc5df 100644
--- a/runtimes/pure_arm_compute/src/internal/op/SquaredDifference.cc
+++ b/runtimes/pure_arm_compute/src/internal/op/SquaredDifference.cc
@@ -1,3 +1,19 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 #include "internal/op/SquaredDifference.h"
 #include "internal/op/NodeVisitor.h"
 
@@ -30,7 +46,7 @@ namespace SquaredDifference
 Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
              const uint32_t *outputs)
 {
-  assert(inputCount == 3 && outputCount == 1);
+  assert(inputCount == 2 && outputCount == 1);
 
   ofm_index = outputs[0];
 
@@ -38,10 +54,8 @@ Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
   //
   //  0 -> LHS Tensor Index
   //  1 -> RHS Tensor Index
-  //  2 -> Activation Index
   lhs_index = inputs[0];
   rhs_index = inputs[1];
-  activation_index = inputs[2];
 }
 
 } // namespace SquaredDifference
diff --git a/runtimes/pure_arm_compute/src/internal/op/SquaredDifference.h b/runtimes/pure_arm_compute/src/internal/op/SquaredDifference.h
index 7760405b9..ecbb03209 100644
--- a/runtimes/pure_arm_compute/src/internal/op/SquaredDifference.h
+++ b/runtimes/pure_arm_compute/src/internal/op/SquaredDifference.h
@@ -1,3 +1,25 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file    SquaredDifference.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines internal::tflite::op::SquaredDifference::Param struct
+ *          and internal::tflite::op::SquaredDifference::Node class
+ */
 #ifndef __INTERNAL_OP_SQUAREDDIFFERENCE_H__
 #define __INTERNAL_OP_SQUAREDDIFFERENCE_H__
 
@@ -14,33 +36,62 @@ namespace op
 namespace SquaredDifference
 {
 
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
 struct Param
 {
-  int32_t ofm_index;
-
-  int32_t lhs_index;
-  int32_t rhs_index;
-  int32_t activation_index;
+  int32_t ofm_index; /**< Index of output feature map */
 
+  int32_t lhs_index; /**< Index of lhs */
+  int32_t rhs_index; /**< Index of rhs */
+  /**
+   * @brief Construct as default
+   */
   Param() = default;
+  /**
+   * @brief Construct a new Param object with params
+   * @param[in] inputCount  Count of inputs
+   * @param[in] inputs      Pointer of inputs
+   * @param[in] outputCount Count of outputs
+   * @param[in] outputs     Pointer of outputs
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to represent an operation of data structure
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Node object with param
+   * @param[in] param Param object that makes up a Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Destruct as default
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief  Get a reference of Param object
+   * @return Reference of Param object
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Visit this Node by NodeVisitor
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/Squeeze.h b/runtimes/pure_arm_compute/src/internal/op/Squeeze.h
index e871067f5..d5f36f85f 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Squeeze.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Squeeze.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file    Squeeze.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief   This file defines internal::tflite::op::Squeeze::Param struct
+ *          and internal::tflite::op::Squeeze::Node class
+ */
 #ifndef __INTERNAL_OP_SQUEEZE_H__
 #define __INTERNAL_OP_SQUEEZE_H__
 
@@ -30,32 +36,64 @@ namespace op
 namespace Squeeze
 {
 
+/**
+ * @brief Struct to have indexes for operation parameter
+ */
 struct Param
 {
-  int32_t output_index;
-
-  int32_t input_index;
-  int32_t dims_index_optional = -1; // optional param. default is -1
+  int32_t output_index; /**< Index of output feature map */
 
+  int32_t input_index; /**< Index of input feature map */
+  // optional param. default is -1
+  int32_t dims_index_optional = -1; /**< Index of dims */
+  /**
+   * @brief Construct as default
+   */
   Param() = default;
+  /**
+   * @brief Construct a new Param object with params
+   * @param[in] inputCount  Count of inputs
+   * @param[in] inputs      Pointer of inputs
+   * @param[in] outputCount Count of outputs
+   * @param[in] outputs     Pointer of outputs
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to represent an operation of data structure
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Node object with param
+   * @param[in] param Param object that makes up a Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Destruct as default
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief  Get a reference of Param object
+   * @return Reference of Param object
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Visit this Node by NodeVisitor
+   * @param[in] v Visitor
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/StridedSlice.h b/runtimes/pure_arm_compute/src/internal/op/StridedSlice.h
index 26bb81a8c..21dbb9e68 100644
--- a/runtimes/pure_arm_compute/src/internal/op/StridedSlice.h
+++ b/runtimes/pure_arm_compute/src/internal/op/StridedSlice.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file StridedSlice.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines StridedSlice node
+ */
+
 #ifndef __INTERNAL_OP_STRIDEDSLICE_H__
 #define __INTERNAL_OP_STRIDEDSLICE_H__
 
@@ -30,41 +36,73 @@ namespace op
 namespace StridedSlice
 {
 
+/**
+ * @brief Struct to manipulate parameter for StridedSlice operation
+ */
 struct Param
 {
-  int32_t outputData_index;
+  int32_t outputData_index; //!< index for output data
 
-  int32_t inputData_index;
-  int32_t startData_index;
-  int32_t endData_index;
-  int32_t stridesData_index;
-  int32_t beginMask_index;
-  int32_t endMask_index;
-  int32_t shrinkAxisMask_index;
+  int32_t inputData_index;      //!< index for input data
+  int32_t startData_index;      //!< index where slicing start from
+  int32_t endData_index;        //!< index where slicing ends to
+  int32_t stridesData_index;    //!< index for stride value
+  int32_t beginMask_index;      //!< index for beginmask
+  int32_t endMask_index;        //!< index for endmask
+  int32_t shrinkAxisMask_index; //!< index for shrink axis
 
+  /**
+   * @brief Default Constructor
+   */
   Param() = default;
+  /**
+   * @brief Construct a new Param object
+   * @param[in] inputCount the number of inputs
+   * @param[in] inputs pointer for input data
+   * @param[in] outputCount the number of outputs
+   * @param[in] outputs pointer for input data
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to define StridedSlice Operation
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new StridedSlice Node object
+   * @param param Parameter for StridedSlice Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Default Destructor
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief Get parameter
+   * @return Param reference
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Accept a NodeVisitor so that it can visit this node
+   * @param [in] v Visitor
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
-  const Param _param;
+  const Param _param; //!< parameter for StridedSlice node
 };
 
 } // namespace StridedSlice
diff --git a/runtimes/pure_arm_compute/src/internal/op/Sub.h b/runtimes/pure_arm_compute/src/internal/op/Sub.h
index 3da271029..864359d1e 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Sub.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Sub.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file Sub.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines SUB Node
+ */
+
 #ifndef __INTERNAL_OP_SUB_H__
 #define __INTERNAL_OP_SUB_H__
 
@@ -30,37 +36,69 @@ namespace op
 namespace Sub
 {
 
+/**
+ * @brief Struct to manipulate parameters for SUB
+ */
 struct Param
 {
-  int32_t ofm_index;
+  int32_t ofm_index; //!< index for output feature map
 
-  int32_t lhs_index;
-  int32_t rhs_index;
-  int32_t activation_index;
+  int32_t lhs_index;        //!< index for left-hand side
+  int32_t rhs_index;        //!< index for right-hand side
+  int32_t activation_index; //!< index for activation function
 
+  /**
+   * @brief Default Constructor
+   */
   Param() = default;
+  /**
+   * @brief Construct a new Param object
+   * @param[in] inputCount the number of inputs
+   * @param[in] inputs pointer for input data
+   * @param[in] outputCount the number of outputs
+   * @param[in] outputs pointer for input data
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to define SUB Operation
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Sub Node object
+   * @param param Parameter for Sub Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Default Destructor
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief Get parameter
+   * @return Param reference
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Accept a NodeVisitor so that it can visit this node
+   * @param [in] v Visitor
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
-  const Param _param;
+  const Param _param; //!< parameter for SUB node
 };
 
 } // namespace Sub
diff --git a/runtimes/pure_arm_compute/src/internal/op/Tanh.h b/runtimes/pure_arm_compute/src/internal/op/Tanh.h
index f5a9f102e..fd87297f1 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Tanh.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Tanh.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file Tanh.h
+ * @ingroup COM_AI_RUNTIME
+ * @brief This file defines TANH node
+ */
+
 #ifndef __INTERNAL_OP_TANH_H__
 #define __INTERNAL_OP_TANH_H__
 
@@ -30,35 +36,67 @@ namespace op
 namespace Tanh
 {
 
+/**
+ * @brief Struct to manipulate parameter for hyperbolic tangent operation
+ */
 struct Param
 {
-  int32_t ofm_index;
+  int32_t ofm_index; //!< index for output feature map
 
-  int32_t ifm_index;
+  int32_t ifm_index; //!< index for input feature map
 
+  /**
+   * @brief Default Constructor
+   */
   Param() = default;
+  /**
+   * @brief Construct a new Param object
+   * @param[in] inputCount the number of inputs
+   * @param[in] inputs pointer for input data
+   * @param[in] outputCount the number of outputs
+   * @param[in] outputs pointer for input data
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to define Tanh Operation
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Tanh Node object
+   * @param param Parameter for Tanh Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Default Destructor
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief Get parameter
+   * @return Param reference
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Accept a NodeVisitor so that it can visit this node
+   * @param [in] v Visitor
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
-  const Param _param;
+  const Param _param; //!< parameter for Tanh node
 };
 
 } // namespace Tanh
diff --git a/runtimes/pure_arm_compute/src/internal/op/TopKV2.h b/runtimes/pure_arm_compute/src/internal/op/TopKV2.h
index 79bbd1f2e..02b7827e9 100644
--- a/runtimes/pure_arm_compute/src/internal/op/TopKV2.h
+++ b/runtimes/pure_arm_compute/src/internal/op/TopKV2.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file TopKV2.h
+ * @brief This file contains accept function and params for TopKV2 operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
 #ifndef __INTERNAL_OP_TOPKV2_H__
 #define __INTERNAL_OP_TOPKV2_H__
 
@@ -30,33 +36,66 @@ namespace op
 namespace TopKV2
 {
 
+/**
+ * @brief Struct of TopKV2 operation's param
+ */
 struct Param
 {
-  int32_t outputValues_index;
-  int32_t outputIndices_index;
+  int32_t outputValues_index;  /**< Output values index */
+  int32_t outputIndices_index; /**< Output indices index */
 
-  int32_t inputData_index;
-  int32_t k_index;
+  int32_t inputData_index; /**< Input data index */
+  int32_t k_index;         /**< K value index */
 
+  /**
+   * @brief Construct a new Param object for TopKV2 as default
+   */
   Param() = default;
+
+  /**
+   * @brief Construct a new Param object for TopKV2 with params
+   * @param [in] inputCount The number of input
+   * @param [in] inputs Array containing inputs
+   * @param [in] outputCount The number of output
+   * @param [in] outputs Array containing outputs
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to define operation node for TopKV2
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Node object for TopKV2 with param
+   * @param [in] param Parameters for Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Destroy the Node object for TopKV2
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief Get parameters for TopKV2
+   * @return Parameters of TopKV2
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Function for accepting node for TopKV2
+   * @param [in] v Node visitor for invoking visit function of TopKV2
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/Transpose.h b/runtimes/pure_arm_compute/src/internal/op/Transpose.h
index dac2ef8f2..bb01bf322 100644
--- a/runtimes/pure_arm_compute/src/internal/op/Transpose.h
+++ b/runtimes/pure_arm_compute/src/internal/op/Transpose.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file Transpose.h
+ * @brief This file contains accept function and params for Transpose operation
+ * @ingroup COM_AI_RUNTIME
+ */
+
 #ifndef __INTERNAL_OP_TRANSPOSE_H__
 #define __INTERNAL_OP_TRANSPOSE_H__
 
@@ -30,32 +36,65 @@ namespace op
 namespace Transpose
 {
 
+/**
+ * @brief Struct of Transpose operation's param
+ */
 struct Param
 {
-  int32_t ofm_index;
+  int32_t ofm_index; /**< Output format index */
 
-  int32_t ifm_index;
-  int32_t permu_index;
+  int32_t ifm_index;   /**< Input format index */
+  int32_t permu_index; /**< Permutation index */
 
+  /**
+   * @brief Construct a new Param object for Transpose as default
+   */
   Param() = default;
+
+  /**
+   * @brief Construct a new Param object for Transpose with params
+   * @param [in] inputCount The number of input
+   * @param [in] inputs Array containing inputs
+   * @param [in] outputCount The number of output
+   * @param [in] outputs Array containing outputs
+   */
   Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
 };
 
+/**
+ * @brief Class to define operation node for Transpose
+ */
 class Node final : public op::Node
 {
 public:
+  /**
+   * @brief Construct a new Node object for Transpose with param
+   * @param [in] param Parameters for Node
+   */
   Node(const Param &param) : _param(param)
   {
     // DO NOTHING
   }
 
 public:
+  /**
+   * @brief Destroy the Node object for Transpose
+   */
   virtual ~Node() = default;
 
 public:
+  /**
+   * @brief Get parameters for Transpose
+   * @return Parameters of Transpose
+   */
   const Param &param(void) const { return _param; }
 
 public:
+  /**
+   * @brief Function for accepting node for Transpose
+   * @param [in] v Node visitor for invoking visit function of Transpose
+   * @return N/A
+   */
   void accept(NodeVisitor &&) const override;
 
 private:
diff --git a/runtimes/pure_arm_compute/src/internal/op/TransposeConv.cc b/runtimes/pure_arm_compute/src/internal/op/TransposeConv.cc
new file mode 100644
index 000000000..502eff525
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/TransposeConv.cc
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/TransposeConv.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace TransposeConv
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace TransposeConv
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace TransposeConv
+{
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+             const uint32_t *outputs)
+{
+  assert(inputCount == 6 && outputCount == 1);
+
+  ofm_index = outputs[0];
+
+  // Each input should be interpreted as follows:
+  //
+  //  0 -> Output Shape Index
+  //  1 -> Weights Index
+  //  2 -> Input Tensor Index
+  //  3 -> Padding Type
+  //  4 -> Stride width
+  //  5 -> Stride height
+
+  op_shape_index = inputs[0];
+  ker_index = inputs[1];
+  ifm_index = inputs[2];
+  padding_index = inputs[3];
+  hstride_index = inputs[4];
+  vstride_index = inputs[5];
+}
+
+} // namespace TransposeConv
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/TransposeConv.h b/runtimes/pure_arm_compute/src/internal/op/TransposeConv.h
new file mode 100644
index 000000000..b0122f82d
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/TransposeConv.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_TRANSPOSECONV_H__
+#define __INTERNAL_OP_TRANSPOSECONV_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace TransposeConv
+{
+
+struct Param
+{
+  int32_t ofm_index;
+
+  int32_t op_shape_index;
+  int32_t ker_index;
+  int32_t ifm_index;
+  int32_t padding_index;
+  int32_t hstride_index;
+  int32_t vstride_index;
+
+  Param() = default;
+  Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+  Node(const Param &param) : _param(param)
+  {
+    // DO NOTHING
+  }
+
+public:
+  virtual ~Node() = default;
+
+public:
+  const Param &param(void) const { return _param; }
+
+public:
+  void accept(NodeVisitor &&) const override;
+
+private:
+  const Param _param;
+};
+
+} // namespace TransposeConv
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_TRANSPOSECONV_H__
diff --git a/runtimes/pure_arm_compute/src/internal/op/Unpack.cc b/runtimes/pure_arm_compute/src/internal/op/Unpack.cc
new file mode 100644
index 000000000..a1be0280c
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/Unpack.cc
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/op/Unpack.h"
+#include "internal/op/NodeVisitor.h"
+
+#include <cassert>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Unpack
+{
+
+void Node::accept(NodeVisitor &&v) const { v.visit(*this); }
+
+} // namespace Unpack
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Unpack
+{
+// There are three inputs: tensor which is to be unpacked,
+// axis along which tensor needs to be unpacked
+// and number of splits along the axis.
+
+Param::Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount,
+             const uint32_t *outputs)
+{
+  assert(inputCount == 3);
+
+  ifm_index = inputs[0];
+
+  for (uint32_t n = 0; n < outputCount; ++n)
+  {
+    ofm_indexes.emplace_back(outputs[n]);
+  }
+  num_split_index = inputs[1];
+  axis_index = inputs[2];
+}
+
+} // namespace Unpack
+} // namespace op
+} // namespace tflite
+} // namespace internal
diff --git a/runtimes/pure_arm_compute/src/internal/op/Unpack.h b/runtimes/pure_arm_compute/src/internal/op/Unpack.h
new file mode 100644
index 000000000..575e3d024
--- /dev/null
+++ b/runtimes/pure_arm_compute/src/internal/op/Unpack.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __INTERNAL_OP_UNPACK_H__
+#define __INTERNAL_OP_UNPACK_H__
+
+#include "internal/op/Node.h"
+
+#include <cstdint>
+#include <vector>
+
+namespace internal
+{
+namespace tflite
+{
+namespace op
+{
+namespace Unpack
+{
+
+struct Param
+{
+  int32_t ifm_index;
+  int32_t axis_index;
+  int32_t num_split_index;
+  // There are N outputs after Unpacking Input Tensor along axis
+  std::vector<int32_t> ofm_indexes;
+
+  Param() = default;
+  Param(uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, const uint32_t *outputs);
+};
+
+class Node final : public op::Node
+{
+public:
+  Node(const Param &param) : _param(param)
+  {
+    // DO NOTHING
+  }
+
+public:
+  virtual ~Node() = default;
+
+public:
+  const Param &param(void) const { return _param; }
+
+public:
+  void accept(NodeVisitor &&) const override;
+
+private:
+  const Param _param;
+};
+
+} // namespace Unpack
+} // namespace op
+} // namespace tflite
+} // namespace internal
+
+#endif // __INTERNAL_OP_UNPACK_H__
diff --git a/runtimes/pure_arm_compute/src/logging.h b/runtimes/pure_arm_compute/src/logging.h
index 61b434eda..914b63057 100644
--- a/runtimes/pure_arm_compute/src/logging.h
+++ b/runtimes/pure_arm_compute/src/logging.h
@@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
+/**
+ * @file logging.h
+ * @brief This file contains Context class for logging.
+ * @ingroup COM_AI_RUNTIME
+ */
+
 #ifndef __PURE_ARM_COMPUTE_LOGGING_H__
 #define __PURE_ARM_COMPUTE_LOGGING_H__
 
@@ -22,9 +28,15 @@
 namespace logging
 {
 
+/**
+ * @brief class to define Context for logging
+ */
 class Context
 {
 public:
+  /**
+   * @brief Construct default
+   */
   Context() : _enabled{false}
   {
     auto env = std::getenv("PURE_ARM_COMPUTE_LOG_ENABLE");
@@ -36,12 +48,21 @@ public:
   }
 
 public:
+  /**
+   * @brief Get @c true if PURE_ARM_COMPUTE_LOG_ENABLE has been set as environment value, otherwise
+   * @c false
+   * @return @c true if PURE_ARM_COMPUTE_LOG_ENABLE has been set as environment value, otherwise @c
+   * false
+   */
   bool enabled(void) const { return _enabled; }
 
 private:
   bool _enabled;
 };
 
+/**
+ * @brief static Context class for logging
+ */
 static Context ctx;
 
 } // namespace logging
diff --git a/runtimes/pure_arm_compute/src/memory.cc b/runtimes/pure_arm_compute/src/memory.cc
index 2a9294f86..9e999661a 100644
--- a/runtimes/pure_arm_compute/src/memory.cc
+++ b/runtimes/pure_arm_compute/src/memory.cc
@@ -18,7 +18,7 @@
 #include <sys/mman.h>
 #include <memory>
 
-#include "nnfw/std/memory.h"
+#include "cpp14/memory.h"
 #include "memory.h"
 
 int ANeuralNetworksMemory_createFromFd(size_t size, int protect, int fd, size_t offset,
@@ -31,7 +31,7 @@ int ANeuralNetworksMemory_createFromFd(size_t size, int protect, int fd, size_t
 
   // Use unique pointer to avoid memory leak
   std::unique_ptr<ANeuralNetworksMemory> memory_ptr =
-      nnfw::make_unique<ANeuralNetworksMemory>(size, protect, fd, offset);
+      nnfw::cpp14::make_unique<ANeuralNetworksMemory>(size, protect, fd, offset);
   if (memory_ptr == nullptr)
   {
     return ANEURALNETWORKS_OUT_OF_MEMORY;
diff --git a/runtimes/pure_arm_compute/src/memory.h b/runtimes/pure_arm_compute/src/memory.h
index 8bd43b0d6..ffac26ef6 100644
--- a/runtimes/pure_arm_compute/src/memory.h
+++ b/runtimes/pure_arm_compute/src/memory.h
@@ -14,20 +14,51 @@
  * limitations under the License.
  */
 
+/**
+ * @file execution.h
+ * @brief This file defines ANeuralNetworksMemory class for handling Memory NNAPI
+ * @ingroup COM_AI_RUNTIME
+ */
+
 #ifndef __MEMORY_H__
 #define __MEMORY_H__
 
 #include <cstdint>
 
+/**
+ * @brief struct to define Memory NNAPI
+ */
 struct ANeuralNetworksMemory
 {
 public:
+  /**
+   * @brief Constructor with params
+   * @param [in] size The requested size in bytes
+   * @param [in] protect The desired memory protection for the mapping
+   * @param [in] fd The requested file descriptor
+   * @param [in] offset The offset to the beginning of the file of the area to map
+   */
   ANeuralNetworksMemory(size_t size, int protect, int fd, size_t offset);
+  /**
+   * @brief Destructor default
+   */
   ~ANeuralNetworksMemory();
 
 public:
+  /**
+   * @brief Get size
+   * @return size
+   */
   size_t size(void) const { return _size; }
+  /**
+   * @brief Get base pointer
+   * @return base pointer
+   */
   uint8_t *base(void) { return _base; }
+  /**
+   * @brief Get base pointer
+   * @return const base pointer
+   */
   const uint8_t *base(void) const { return _base; }
 
 private:
diff --git a/runtimes/pure_arm_compute/src/model.cc b/runtimes/pure_arm_compute/src/model.cc
index 49ea59f17..2c4120d7a 100644
--- a/runtimes/pure_arm_compute/src/model.cc
+++ b/runtimes/pure_arm_compute/src/model.cc
@@ -602,6 +602,28 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
 
       break;
     }
+    case ANEURALNETWORKS_SPACE_TO_BATCH_ND:
+    {
+      using internal::tflite::op::SpaceToBatchND::Param;
+      using internal::tflite::op::SpaceToBatchND::Node;
+
+      auto &operations = model->deref().operations();
+
+      operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+      break;
+    }
+    case ANEURALNETWORKS_BATCH_TO_SPACE_ND:
+    {
+      using internal::tflite::op::BatchToSpaceNd::Param;
+      using internal::tflite::op::BatchToSpaceNd::Node;
+
+      auto &operations = model->deref().operations();
+
+      operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+      break;
+    }
     case ANEURALNETWORKS_L2_POOL_2D:
     {
       // Input count is 7 for Implicit Padding
@@ -675,6 +697,29 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel *model,
 
       break;
     }
+    case ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION:
+    {
+
+      using internal::tflite::op::LocalResponseNormalization::Param;
+      using internal::tflite::op::LocalResponseNormalization::Node;
+
+      auto &operations = model->deref().operations();
+
+      operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+      break;
+    }
+    case ANEURALNETWORKS_DEPTH_TO_SPACE:
+    {
+      using internal::tflite::op::DepthToSpace::Param;
+      using internal::tflite::op::DepthToSpace::Node;
+
+      auto &operations = model->deref().operations();
+
+      operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+      break;
+    }
     default:
       throw std::runtime_error{"Not supported operation"};
   };
@@ -706,6 +751,18 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
 
       break;
     }
+    case ANEURALNETWORKS_REDUCE_MIN_EX:
+    {
+      using internal::tflite::op::ReduceMin::Param;
+      using internal::tflite::op::ReduceMin::Node;
+
+      // Add 'operations'
+      auto &operations = model->deref().operations();
+
+      operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+      break;
+    }
     case ANEURALNETWORKS_TENSORFLOW_MAX_EX:
     {
       using internal::tflite::op::ReduceMax::Param;
@@ -718,6 +775,53 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
 
       break;
     }
+    case ANEURALNETWORKS_PRELU_EX:
+    {
+      using internal::tflite::op::PReLU::Param;
+      using internal::tflite::op::PReLU::Node;
+
+      // Add 'operations'
+      auto &operations = model->deref().operations();
+
+      operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+      break;
+    }
+    case ANEURALNETWORKS_TRANSPOSE_CONV_EX:
+    {
+      using internal::tflite::op::TransposeConv::Param;
+      using internal::tflite::op::TransposeConv::Node;
+
+      auto &operations = model->deref().operations();
+
+      operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+      break;
+    }
+    case ANEURALNETWORKS_LOGICAL_AND_EX:
+    {
+      using internal::tflite::op::LogicalAnd::Param;
+      using internal::tflite::op::LogicalAnd::Node;
+
+      // Add 'operations'
+      auto &operations = model->deref().operations();
+
+      operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+      break;
+    }
+    case ANEURALNETWORKS_LOGICAL_OR_EX:
+    {
+      using internal::tflite::op::LogicalOr::Param;
+      using internal::tflite::op::LogicalOr::Node;
+
+      // Add 'operations'
+      auto &operations = model->deref().operations();
+
+      operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+      break;
+    }
     case ANEURALNETWORKS_RSQRT_EX:
     {
       using internal::tflite::op::RSQRT::Param;
@@ -730,6 +834,30 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
 
       break;
     }
+    case ANEURALNETWORKS_SQRT_EX:
+    {
+      using internal::tflite::op::SQRT::Param;
+      using internal::tflite::op::SQRT::Node;
+
+      // Add 'operations'
+      auto &operations = model->deref().operations();
+
+      operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+      break;
+    }
+    case ANEURALNETWORKS_EQUAL_EX:
+    {
+      using internal::tflite::op::Equal::Param;
+      using internal::tflite::op::Equal::Node;
+
+      // Add 'operations'
+      auto &operations = model->deref().operations();
+
+      operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+      break;
+    }
     case ANEURALNETWORKS_SQUARED_DIFFERENCE_EX:
     {
       using internal::tflite::op::SquaredDifference::Param;
@@ -778,6 +906,101 @@ int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel *model,
 
       break;
     }
+    case ANEURALNETWORKS_UNPACK_EX:
+    {
+      using internal::tflite::op::Unpack::Param;
+      using internal::tflite::op::Unpack::Node;
+
+      auto &operations = model->deref().operations();
+
+      operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+      break;
+    }
+    case ANEURALNETWORKS_NEG_EX:
+    {
+      using internal::tflite::op::Neg::Param;
+      using internal::tflite::op::Neg::Node;
+
+      // Add 'operations'
+      auto &operations = model->deref().operations();
+
+      operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+      break;
+    }
+    case ANEURALNETWORKS_EXP_EX:
+    {
+      using internal::tflite::op::Exp::Param;
+      using internal::tflite::op::Exp::Node;
+
+      // Add 'operations'
+      auto &operations = model->deref().operations();
+
+      operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+      break;
+    }
+    case ANEURALNETWORKS_REDUCE_SUM_EX:
+    {
+      using internal::tflite::op::ReduceSum::Param;
+      using internal::tflite::op::ReduceSum::Node;
+
+      // Add 'operations'
+      auto &operations = model->deref().operations();
+
+      operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+      break;
+    }
+    case ANEURALNETWORKS_PACK_EX:
+    {
+      using internal::tflite::op::Pack::Param;
+      using internal::tflite::op::Pack::Node;
+
+      auto &operations = model->deref().operations();
+
+      operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+      break;
+    }
+    case ANEURALNETWORKS_ABS_EX:
+    {
+      using internal::tflite::op::Abs::Param;
+      using internal::tflite::op::Abs::Node;
+
+      // Add 'operations'
+      auto &operations = model->deref().operations();
+
+      operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+      break;
+    }
+    case ANEURALNETWORKS_ARGMAX_EX:
+    {
+      using internal::tflite::op::ArgMax::Param;
+      using internal::tflite::op::ArgMax::Node;
+
+      // Add 'operations'
+      auto &operations = model->deref().operations();
+
+      operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+      break;
+    }
+    case ANEURALNETWORKS_NOT_EQUAL_EX:
+    {
+      using internal::tflite::op::NotEqual::Param;
+      using internal::tflite::op::NotEqual::Node;
+
+      // Add 'operations'
+      auto &operations = model->deref().operations();
+
+      operations.emplace_back<Node>(Param{inputCount, inputs, outputCount, outputs});
+
+      break;
+    }
+
     default:
       throw std::runtime_error{"Not supported operation"};
   }
diff --git a/runtimes/pure_arm_compute/src/model.h b/runtimes/pure_arm_compute/src/model.h
index a7e606201..8acc894f4 100644
--- a/runtimes/pure_arm_compute/src/model.h
+++ b/runtimes/pure_arm_compute/src/model.h
@@ -14,22 +14,52 @@
  * limitations under the License.
  */
 
+/**
+ * @file model.h
+ * @brief This file contains ANeuralNetworksModel classe for handling Model NNAPI such as
+ * ANeuralNetworksModel_create, ANeuralNetworksModel_addOperand
+ * @ingroup COM_AI_RUNTIME
+ */
+
 #ifndef __MODEL_H__
 #define __MODEL_H__
 
 #include "internal/Model.h"
 
+/**
+ * @brief struct to express Model of NNAPI
+ */
 struct ANeuralNetworksModel
 {
 public:
+  /**
+   * @brief Construct without params
+   */
   ANeuralNetworksModel();
 
 public:
+  /**
+   * @brief Get reference of internal::tflite::Model
+   * @return Reference of internal::tflite::Model
+   */
   internal::tflite::Model &deref(void) { return *_model; }
 
 public:
+  /**
+   * @brief Release internal::tflite::Model pointer to param
+   * @param [in] model To get released internal::tflite::Model pointer
+   * @return N/A
+   */
   void release(std::shared_ptr<const internal::tflite::Model> &model) { model = _model; }
+  /**
+   * @brief Get @c true if ANeuralNetworksModel_finish has been called, otherwise @c false
+   * @return @c true if ANeuralNetworksModel_finish has been called, otherwise @c false
+   */
   bool isFinished() { return _isFinished == true; }
+  /**
+   * @brief Mark model process finished
+   * @return N/A
+   */
   void markAsFinished() { _isFinished = true; }
 
 private: