Imported Upstream version 1.20.0upstream/1.20.0 submit/tizen/20220415.103159

author: Chunseok Lee <chunseok.lee@samsung.com> 2022-04-15 19:15:11 +0900
committer: Chunseok Lee <chunseok.lee@samsung.com> 2022-04-15 19:15:11 +0900
commit: 3ad689f0803519e343c36d5700646e86059df961 (patch)
tree: 862346c401a5577518fa7f042532aa931b53aa0e /runtime/onert/backend
parent: ac6e4dd7b480e83b586ef533d7b29a8a97eb48fe (diff)
download: nnfw-3ad689f0803519e343c36d5700646e86059df961.tar.gz
nnfw-3ad689f0803519e343c36d5700646e86059df961.tar.bz2
nnfw-3ad689f0803519e343c36d5700646e86059df961.zip
170 files changed, 2266 insertions, 24518 deletions
diff --git a/runtime/onert/backend/CMakeLists.txt b/runtime/onert/backend/CMakeLists.txt
index 4b21e0ace..c43160ba7 100644
--- a/runtime/onert/backend/CMakeLists.txt
+++ b/runtime/onert/backend/CMakeLists.txt
@@ -1,9 +1,14 @@
+# Backend common libs
 set(LIB_ONERT_BACKEND_ACL_COMMON onert_backend_acl_common)
+set(LIB_ONERT_BACKEND_CL_COMMON onert_backend_cl_common)
+add_subdirectory(cl_common)
+add_subdirectory(acl_common)
 
+# Backends
 add_subdirectory(cpu)
 add_subdirectory(acl_cl)
 add_subdirectory(acl_neon)
-add_subdirectory(acl_common)
 add_subdirectory(ruy)
 add_subdirectory(gpu_cl)
 add_subdirectory(xnnpack)
+add_subdirectory(trix)
diff --git a/runtime/onert/backend/acl_cl/BackendContext.cc b/runtime/onert/backend/acl_cl/BackendContext.cc
deleted file mode 100644
index 5595043ca..000000000
--- a/runtime/onert/backend/acl_cl/BackendContext.cc
+++ /dev/null
@@ -1,242 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "BackendContext.h"
-
-#include "TensorBuilder.h"
-#include "KernelGenerator.h"
-#include "Optimizer.h"
-#include "util/logging.h"
-#include "ir/Index.h"
-#include "ir/OperandIndexMap.h"
-#include "ir/OperandIndexSequence.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace acl_cl
-{
-
-void BackendContext::initConsts()
-{
-  _data.graph->operations().iterate([&](const ir::OperationIndex &, const ir::Operation &op) {
-    constant_initializer->setLayout(graph()->layout());
-    op.accept(*constant_initializer);
-  });
-
-  _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
-    if (_data.external_operands.contains(ind) || !operand.isConstant())
-      return;
-    const auto &obj = graph()->operands().at(ind);
-    if (obj.isConstant() && !constant_initializer->exist(ind))
-    {
-      constant_initializer->registerDefaultInitializer(ind, obj);
-    }
-  });
-
-  constant_initializer->run();
-}
-
-void BackendContext::planTensors()
-{
-  ir::OperandIndexMap<uint32_t> uses_map;
-  ir::OperandIndexMap<uint32_t> def_map;
-  ir::OperandIndexSequence constants;
-
-  // Prepare scanning
-  _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
-    if (_data.external_operands.contains(ind))
-      return;
-
-    uses_map[ind] = obj.getUses().size();
-    def_map[ind] = obj.getDef().valid() ? 1 : 0;
-
-    if (obj.isConstant())
-      constants.append(ind);
-
-    if (!tensor_builder->isRegistered(ind))
-    {
-      // These tensors do not exist in any operation (No use and def)
-      const auto info = obj.info();
-      const auto layout = _data.operand_layouts.at(ind);
-      // TODO Change tensor info to have permuted shape
-      tensor_builder->registerTensorInfo(ind, info, layout);
-    }
-  });
-
-  // Start scanning to do notify{First|Last}Use for each tensor
-
-  // If a tensor is a constant, increase the use of the tensor and allocate it first.
-  // Increasing use count here makes the tensor never be deallocated, i.e it they will be
-  // deallocated last.
-  VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl;
-  for (const auto &ind : constants)
-  {
-    uses_map[ind]++;
-    tensor_builder->notifyFirstUse(ind);
-  }
-
-  // At each operation,
-  // 1. Scan DEF of outputs. If the DEF, allocate it
-  // 2. Scan DEF of inputs. If variable tensor, allocate it
-  // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
-  for (const auto op_ind : _data.op_order)
-  {
-    const auto &op = graph()->operations().at(op_ind);
-    auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
-    auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
-
-    // Define outputs
-    for (const auto &ind : op_outputs)
-    {
-      if (!tensor_builder->isRegistered(ind))
-        continue;
-      assert(def_map.find(ind) != def_map.end());
-      if (def_map[ind])
-      {
-        def_map[ind] = 0;
-        tensor_builder->notifyFirstUse(ind);
-      }
-    }
-
-    // Scan variable tensors
-    // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
-    // non-constant because of less memory usage by memory planning in here
-    for (const auto &ind : op_inputs)
-    {
-      if (!tensor_builder->isRegistered(ind))
-        continue;
-      const auto &operand = graph()->operands().at(ind);
-      if (operand.info().isVariable())
-      {
-        // The variable tensor with buffer is not supported yet
-        assert(operand.data() == nullptr);
-        assert(operand.getUses().size() == 1 && !operand.getDef().valid());
-        assert(uses_map[ind] == 1 && def_map[ind] == 0);
-        tensor_builder->notifyFirstUse(ind);
-      }
-    }
-
-    for (const auto &ind : op_inputs)
-    {
-      if (!tensor_builder->isRegistered(ind))
-        continue;
-      assert(uses_map.find(ind) != uses_map.end());
-      assert(uses_map[ind] > 0);
-      uses_map[ind]--;
-      if (uses_map[ind] == 0)
-      {
-        // plan for deallocation of static tensornode
-        tensor_builder->notifyLastUse(ind);
-      }
-    }
-  }
-
-  _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
-    if (uses_map[ind] == 0)
-    {
-      tensor_builder->notifyLastUse(ind);
-    }
-  });
-
-  // Dispose and validate
-  for (const auto &ind : constants)
-  {
-    --uses_map[ind];
-    if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
-    {
-      tensor_builder->notifyLastUse(ind);
-    }
-  }
-
-  assert(
-    std::all_of(uses_map.begin(), uses_map.end(),
-                [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-
-  assert(
-    std::all_of(def_map.begin(), def_map.end(),
-                [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-}
-
-ITensorRegistry *BackendContext::genTensors()
-{
-  optimizer->optimize();
-
-  graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
-    if (external_operands().contains(ind))
-      return;
-
-    const auto frontend_layout = graph()->layout();
-    const auto backend_layout = operand_layouts().at(ind);
-    ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
-                                 obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
-    tensor_builder->registerTensorInfo(ind, backend_info, backend_layout);
-  });
-
-  // TODO Get compiler options from compiler, and use it rather than getting it from Env
-  if (util::getConfigString(util::config::EXECUTOR) == "Linear")
-  {
-    planTensors();
-  }
-  else
-  {
-    // For the executors that does not have fixed linear execution order:
-    // To make tensors never be deallocated, this is a workaround to use static memory planner
-    graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
-      if (tensor_builder->isRegistered(ind))
-        tensor_builder->notifyFirstUse(ind);
-    });
-  }
-
-  tensor_builder->prepare();
-
-  return tensor_registry.get();
-}
-
-FunctionMap BackendContext::genKernels()
-{
-  FunctionMap ret;
-
-  for (auto op_ind : _data.op_order)
-  {
-    auto fn_seq = kernel_gen->generate(op_ind);
-    ret.emplace_back(op_ind, std::move(fn_seq));
-  }
-
-  tensor_builder->allocate();
-  initConsts();
-
-  // NOTE For memory optimization, we want to free some operand data
-  const_cast<ir::Graph &>(*_data.graph)
-    .operands()
-    .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
-
-  for (auto &it : ret)
-  {
-    auto &fn_seq = it.second;
-    fn_seq->iterate([&](exec::IFunction &ifunc) {
-      ifunc.prepare();
-      tensor_builder->postFunctionPrepare();
-    });
-  }
-
-  return ret;
-}
-
-} // namespace acl_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/acl_cl/BackendContext.h b/runtime/onert/backend/acl_cl/BackendContext.h
index 2638046ca..5da915825 100644
--- a/runtime/onert/backend/acl_cl/BackendContext.h
+++ b/runtime/onert/backend/acl_cl/BackendContext.h
@@ -17,10 +17,11 @@
 #ifndef __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__
 #define __ONERT_BACKEND_ACL_CL_BACKEND_CONTEXT_H__
 
-#include <backend/BackendContext.h>
-#include "TensorBuilder.h"
+#include <AclBackendContext.h>
+
 #include "ConstantInitializer.h"
 #include "KernelGenerator.h"
+#include "TensorBuilder.h"
 
 namespace onert
 {
@@ -31,33 +32,8 @@ namespace acl_cl
 
 class Optimizer;
 
-class BackendContext : public onert::backend::BackendContext
-{
-public:
-  BackendContext(const Backend *backend, ContextData &&data,
-                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
-                 std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
-                 std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
-                 std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
-    : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
-      tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, kernel_gen{
-                                                                                    kernel_gen}
-  {
-  }
-
-  ITensorRegistry *genTensors() override;
-  FunctionMap genKernels() override;
-
-private:
-  void initConsts();
-  void planTensors();
-
-public:
-  std::shared_ptr<TensorBuilder> tensor_builder;
-  std::shared_ptr<ConstantInitializer> constant_initializer;
-  std::shared_ptr<KernelGenerator> kernel_gen;
-  std::shared_ptr<Optimizer> optimizer;
-};
+using BackendContext =
+  acl_common::AclBackendContext<TensorBuilder, ConstantInitializer, KernelGenerator, Optimizer>;
 
 } // namespace acl_cl
 } // namespace backend
diff --git a/runtime/onert/backend/acl_cl/ConstantInitializer.cc b/runtime/onert/backend/acl_cl/ConstantInitializer.cc
index 54b2a7a08..0431bb198 100644
--- a/runtime/onert/backend/acl_cl/ConstantInitializer.cc
+++ b/runtime/onert/backend/acl_cl/ConstantInitializer.cc
@@ -58,21 +58,7 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
 
   if (block_size_obj.isConstant())
   {
-    _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
-      assert(model_obj.data());
-      const auto &shape = model_obj.shape();
-      const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
-      assert(model_obj.shape().rank() == 1);
-      obj.access([&](ITensor &tensor) {
-        for (size_t i = 0; i < shape.num_elements(); ++i)
-        {
-          const int32_t value = base[shape.num_elements() - i - 1];
-          int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
-                                                      tensor.calcOffset({static_cast<int32_t>(i)}));
-          *into = value;
-        }
-      });
-    };
+    _init_map[block_size_index] = acl_common::initReverseOrder<int32_t>;
   }
 
   const auto &paddings_index = node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS);
diff --git a/runtime/onert/backend/acl_cl/Optimizer.cc b/runtime/onert/backend/acl_cl/Optimizer.cc
index 12e805ee5..a9ce888ee 100644
--- a/runtime/onert/backend/acl_cl/Optimizer.cc
+++ b/runtime/onert/backend/acl_cl/Optimizer.cc
@@ -16,12 +16,12 @@
 
 #include "Optimizer.h"
 
-#include "ParentInfo.h"
+#include <AclSubTensorAnalyzer.h>
 
-#include <cassert>
 #include <compiler/LoweredGraph.h>
 #include <util/logging.h>
-#include "AclSubTensorAnalyzer.h"
+
+#include <cassert>
 
 namespace onert
 {
diff --git a/runtime/onert/backend/acl_common/AclBackendContext.h b/runtime/onert/backend/acl_common/AclBackendContext.h
new file mode 100644
index 000000000..b8d027476
--- /dev/null
+++ b/runtime/onert/backend/acl_common/AclBackendContext.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_ACL_COMMON_ACLBACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_ACL_COMMON_ACLBACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include <ir/Index.h>
+#include <ir/OperandIndexMap.h>
+#include <ir/OperandIndexSequence.h>
+#include <util/logging.h>
+
+#include <cl_common/BackendContext.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace acl_common
+{
+
+// TODO Find better way to handle common code (reduce template)
+template <typename T_TensorBuilder, typename T_ConstantInitializer, typename T_KernelGenerator,
+          typename T_Optimizer>
+class AclBackendContext
+  : public onert::backend::cl_common::BackendContext<T_TensorBuilder, T_ConstantInitializer,
+                                                     T_KernelGenerator>
+{
+public:
+  AclBackendContext(const Backend *backend, ContextData &&data,
+                    std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+                    std::shared_ptr<T_TensorBuilder> tensor_builder = nullptr,
+                    std::shared_ptr<T_ConstantInitializer> constant_initializer = nullptr,
+                    std::shared_ptr<T_KernelGenerator> kernel_gen = nullptr)
+    : onert::backend::cl_common::BackendContext<T_TensorBuilder, T_ConstantInitializer,
+                                                T_KernelGenerator>(
+        backend, std::move(data), tensor_registry, tensor_builder, constant_initializer, kernel_gen)
+  {
+    // DO NOTHING
+  }
+
+  ITensorRegistry *genTensors() override
+  {
+    optimizer->optimize();
+
+    this->graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+      if (this->external_operands().contains(ind))
+        return;
+
+      const auto frontend_layout = this->graph()->layout();
+      const auto backend_layout = this->operand_layouts().at(ind);
+      ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
+                                   obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
+      this->tensor_builder->registerTensorInfo(ind, backend_info, backend_layout);
+    });
+
+    // TODO Get compiler options from compiler, and use it rather than getting it from Env
+    if (util::getConfigString(util::config::EXECUTOR) == "Linear")
+    {
+      this->planTensors();
+    }
+    else
+    {
+      // For the executors that does not have fixed linear execution order:
+      // To make tensors never be deallocated, this is a workaround to use static memory planner
+      this->graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
+        if (this->tensor_builder->isRegistered(ind))
+          this->tensor_builder->notifyFirstUse(ind);
+      });
+    }
+
+    this->tensor_builder->prepare();
+
+    return this->tensor_registry.get();
+  }
+
+protected:
+  void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                          ir::Layout backend_layout) override
+  {
+    this->tensor_builder->registerTensorInfo(ind, info, backend_layout);
+  }
+
+public:
+  // TODO Make it private
+  std::shared_ptr<T_Optimizer> optimizer;
+};
+
+} // namespace acl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_ACL_COMMON_ACLBACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/acl_common/AclConstantInitializer.h b/runtime/onert/backend/acl_common/AclConstantInitializer.h
index b7f66b50e..65659ad50 100644
--- a/runtime/onert/backend/acl_common/AclConstantInitializer.h
+++ b/runtime/onert/backend/acl_common/AclConstantInitializer.h
@@ -153,6 +153,23 @@ void permuteInit(const onert::ir::Operand &model_obj, onert::backend::ITensor &o
   Init<T>(model_obj, obj, copy, frontend_layout);
 }
 
+// Pre-defined initializer - fill reverse order
+template <typename T> void initReverseOrder(const ir::Operand &model_obj, backend::ITensor &obj)
+{
+  assert(model_obj.data());
+  const auto &shape = model_obj.shape();
+  const auto base = reinterpret_cast<const T *>(model_obj.data()->base());
+  assert(model_obj.shape().rank() == 1);
+  obj.access([&](ITensor &tensor) {
+    for (size_t i = 0; i < shape.num_elements(); ++i)
+    {
+      const T value = base[shape.num_elements() - i - 1];
+      T *into = reinterpret_cast<T *>(tensor.buffer() + tensor.calcOffset({static_cast<T>(i)}));
+      *into = value;
+    }
+  });
+}
+
 class AclConstantInitializer : public ir::OperationVisitor
 {
 public:
diff --git a/runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h b/runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h
index 60f4ebf7e..a0bbe7c3c 100644
--- a/runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h
+++ b/runtime/onert/backend/acl_common/AclSubTensorAnalyzer.h
@@ -17,9 +17,10 @@
 #ifndef __ONERT_BACKEND_ACL_COMMON_ACL_SUB_TENSOR_ANALYZER_H__
 #define __ONERT_BACKEND_ACL_COMMON_ACL_SUB_TENSOR_ANALYZER_H__
 
+#include <cl_common/ParentInfo.h>
+
 #include <ir/OperationVisitor.h>
 #include <ir/Graph.h>
-#include "ParentInfo.h"
 
 namespace onert
 {
@@ -94,21 +95,21 @@ public:
       }
       coordinate_info.set(axis, axis_point);
 
-      _parent_map.emplace(
-        input_index, acl_common::ParentInfo{output_index, _current_op_layout, coordinate_info});
+      _parent_map.emplace(input_index,
+                          cl_common::ParentInfo{output_index, _current_op_layout, coordinate_info});
 
       axis_point += input_shape.dim(axis);
     }
   }
 
-  std::unordered_map<ir::OperandIndex, ParentInfo> &&releaseParentMap()
+  std::unordered_map<ir::OperandIndex, cl_common::ParentInfo> &&releaseParentMap()
   {
     return std::move(_parent_map);
   }
 
 private:
   const ir::Graph &_graph;
-  std::unordered_map<ir::OperandIndex, ParentInfo> _parent_map;
+  std::unordered_map<ir::OperandIndex, cl_common::ParentInfo> _parent_map;
   ir::Layout _current_op_layout{ir::Layout::UNKNOWN};
   bool usePadding{false};
 };
diff --git a/runtime/onert/backend/acl_common/AclTensorBuilder.h b/runtime/onert/backend/acl_common/AclTensorBuilder.h
index 7c1c5dd9a..e008fd6f5 100644
--- a/runtime/onert/backend/acl_common/AclTensorBuilder.h
+++ b/runtime/onert/backend/acl_common/AclTensorBuilder.h
@@ -17,18 +17,21 @@
 #ifndef __ONERT_BACKEND_ACL_COMMON_TEMPL_TENSOR_BUILDER_H__
 #define __ONERT_BACKEND_ACL_COMMON_TEMPL_TENSOR_BUILDER_H__
 
-#include <memory>
-#include <queue>
-
-#include <arm_compute/core/Types.h>
-#include "ir/OperandIndexMap.h"
-#include <ir/Operands.h>
 #include "AclTensorManager.h"
 #include "AclTensorRegistry.h"
-#include <memory>
-#include "ParentInfo.h"
+
+#include <cl_common/LifetimeMap.h>
+#include <cl_common/ParentInfo.h>
+
+#include <ir/OperandIndexMap.h>
+#include <ir/Operands.h>
 #include <util/Utils.h>
 
+#include <arm_compute/core/Types.h>
+
+#include <memory>
+#include <queue>
+
 namespace onert
 {
 namespace backend
@@ -36,16 +39,12 @@ namespace backend
 namespace acl_common
 {
 
-enum class UsesType
-{
-  FIRST,
-  LAST
-};
-
 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor> class AclTensorBuilder
 {
 public:
   using T_AclTensorManager = AclTensorManager<T_ITensor, T_Tensor, T_SubTensor>;
+  // TODO Remove this alias and direct usage of this type
+  using UsesType = cl_common::UsesType;
 
   AclTensorBuilder(const ir::Operands &operands, T_AclTensorManager *tensor_mgr);
 
@@ -76,7 +75,7 @@ public:
     _uses_count_map[index] = num_uses;
   }
 
-  void parent_map(std::unordered_map<ir::OperandIndex, ParentInfo> &&parent_map)
+  void parent_map(std::unordered_map<ir::OperandIndex, cl_common::ParentInfo> &&parent_map)
   {
     _parent_map = std::move(parent_map);
   }
@@ -104,10 +103,10 @@ private:
   std::unique_ptr<T_AclTensorManager> _tensor_mgr;
 
   // for linear executor
-  std::vector<std::pair<UsesType, ir::OperandIndex>> _lifetime_seq;
+  cl_common::LifetimeSeq _lifetime_seq;
 
   // Extra info for concat elimination
-  ir::OperandIndexMap<ParentInfo> _parent_map;
+  ir::OperandIndexMap<cl_common::ParentInfo> _parent_map;
 };
 
 } // namespace acl_common
@@ -217,55 +216,7 @@ void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::prepare(void)
 template <typename T_ITensor, typename T_Tensor, typename T_SubTensor>
 void AclTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::allocate(void)
 {
-  // Update lifetime sequence to apply subtensor optimization
-
-  std::unordered_map<ir::OperandIndex, ir::OperandIndex> root_map;
-  std::function<ir::OperandIndex &(ir::OperandIndex)> find_root =
-    [&](ir::OperandIndex ind) -> ir::OperandIndex & {
-    ir::OperandIndex &ret = root_map[ind];
-
-    // We know the root parent value already
-    if (ret.valid())
-      return ret;
-
-    auto itr = _parent_map.find(ind);
-    if (itr == _parent_map.end())
-    {
-      // If there is no parent, let's store the value of itself
-      return ret = ind;
-    }
-    else
-    {
-      return ret = find_root(itr->second.parent);
-    }
-  };
-
-  ir::OperandIndexMap<bool> first_use_check;
-  ir::OperandIndexMap<bool> last_use_check;
-  std::map<size_t, std::pair<UsesType, ir::OperandIndex>> lifetime_map;
-  for (size_t i = 0; i < _lifetime_seq.size(); i++)
-  {
-    auto &entry = _lifetime_seq[i];
-    if (entry.first != UsesType::FIRST)
-      continue;
-    auto root_ind = find_root(entry.second);
-    if (first_use_check[root_ind])
-      continue;
-    first_use_check[root_ind] = true;
-    lifetime_map[i] = {UsesType::FIRST, root_ind};
-  }
-
-  for (int i = _lifetime_seq.size() - 1; i >= 0; i--)
-  {
-    auto &entry = _lifetime_seq[i];
-    if (entry.first != UsesType::LAST)
-      continue;
-    auto root_ind = find_root(entry.second);
-    if (last_use_check[root_ind])
-      continue;
-    last_use_check[root_ind] = true;
-    lifetime_map[i] = {UsesType::LAST, root_ind};
-  }
+  auto lifetime_map = cl_common::createLifetimeMap(_lifetime_seq, _parent_map);
 
   for (auto &entry : lifetime_map)
   {
diff --git a/runtime/onert/backend/acl_common/CMakeLists.txt b/runtime/onert/backend/acl_common/CMakeLists.txt
index d3ae5acf7..8d409a47c 100644
--- a/runtime/onert/backend/acl_common/CMakeLists.txt
+++ b/runtime/onert/backend/acl_common/CMakeLists.txt
@@ -12,6 +12,7 @@ target_include_directories(${LIB_ONERT_BACKEND_ACL_COMMON} PUBLIC ${CMAKE_CURREN
 target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PUBLIC onert_core)
 target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PUBLIC arm_compute arm_compute_ex)
 target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PUBLIC nnfw_lib_misc)
+target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PUBLIC ${LIB_ONERT_BACKEND_CL_COMMON})
 target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PRIVATE nnfw_common)
 target_link_libraries(${LIB_ONERT_BACKEND_ACL_COMMON} PRIVATE nnfw_coverage)
 
diff --git a/runtime/onert/backend/acl_neon/BackendContext.cc b/runtime/onert/backend/acl_neon/BackendContext.cc
deleted file mode 100644
index 4de3de02d..000000000
--- a/runtime/onert/backend/acl_neon/BackendContext.cc
+++ /dev/null
@@ -1,243 +0,0 @@
-/*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "BackendContext.h"
-
-#include "TensorBuilder.h"
-#include "KernelGenerator.h"
-#include "Optimizer.h"
-#include "util/logging.h"
-#include "ir/Index.h"
-#include "ir/OperandIndexMap.h"
-#include "ir/OperandIndexSequence.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace acl_neon
-{
-
-void BackendContext::initConsts()
-{
-  _data.graph->operations().iterate([&](const ir::OperationIndex &, const ir::Operation &op) {
-    constant_initializer->setLayout(graph()->layout());
-    op.accept(*constant_initializer);
-  });
-
-  _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
-    if (_data.external_operands.contains(ind) || !operand.isConstant())
-      return;
-    const auto &obj = graph()->operands().at(ind);
-    if (obj.isConstant() && !constant_initializer->exist(ind))
-    {
-      constant_initializer->registerDefaultInitializer(ind, obj);
-    }
-  });
-
-  constant_initializer->run();
-}
-
-void BackendContext::planTensors()
-{
-  ir::OperandIndexMap<uint32_t> uses_map;
-  ir::OperandIndexMap<uint32_t> def_map;
-  ir::OperandIndexSequence constants;
-
-  // Prepare scanning
-  _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
-    if (_data.external_operands.contains(ind))
-      return;
-
-    uses_map[ind] = obj.getUses().size();
-    def_map[ind] = obj.getDef().valid() ? 1 : 0;
-
-    if (obj.isConstant())
-      constants.append(ind);
-
-    if (!tensor_builder->isRegistered(ind))
-    {
-      // These tensors do not exist in any operation (No use and def)
-      const auto info = obj.info();
-      const auto layout = _data.operand_layouts.at(ind);
-      // TODO Change tensor info to have permuted shape
-      tensor_builder->registerTensorInfo(ind, info, layout);
-    }
-  });
-
-  // Start scanning to do notify{First|Last}Use for each tensor
-
-  // If a tensor is a constant, increase the use of the tensor and allocate it first.
-  // Increasing use count here makes the tensor never be deallocated, i.e it they will be
-  // deallocated last.
-  VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl;
-  for (const auto &ind : constants)
-  {
-    uses_map[ind]++;
-    tensor_builder->notifyFirstUse(ind);
-  }
-
-  // At each operation,
-  // 1. Scan DEF of outputs. If the DEF, allocate it
-  // 2. Scan DEF of inputs. If variable tensor, allocate it
-  // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
-  for (const auto op_ind : _data.op_order)
-  {
-    auto op_inputs =
-      graph()->operations().at(op_ind).getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
-    auto op_outputs = graph()->operations().at(op_ind).getOutputs() | ir::Remove::DUPLICATED |
-                      ir::Remove::UNDEFINED;
-
-    // Define outputs
-    for (const auto &ind : op_outputs)
-    {
-      if (!tensor_builder->isRegistered(ind))
-        continue;
-      assert(def_map.find(ind) != def_map.end());
-      if (def_map[ind])
-      {
-        def_map[ind] = 0;
-        tensor_builder->notifyFirstUse(ind);
-      }
-    }
-
-    // Scan variable tensors
-    // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
-    // non-constant because of less memory usage by memory planning in here
-    for (const auto &ind : op_inputs)
-    {
-      if (!tensor_builder->isRegistered(ind))
-        continue;
-      const auto &operand = graph()->operands().at(ind);
-      if (operand.info().isVariable())
-      {
-        // The variable tensor with buffer is not supported yet
-        assert(operand.data() == nullptr);
-        assert(operand.getUses().size() == 1 && !operand.getDef().valid());
-        assert(uses_map[ind] == 1 && def_map[ind] == 0);
-        tensor_builder->notifyFirstUse(ind);
-      }
-    }
-
-    for (const auto &ind : op_inputs)
-    {
-      if (!tensor_builder->isRegistered(ind))
-        continue;
-      assert(uses_map.find(ind) != uses_map.end());
-      assert(uses_map[ind] > 0);
-      uses_map[ind]--;
-      if (uses_map[ind] == 0)
-      {
-        // plan for deallocation of static tensornode
-        tensor_builder->notifyLastUse(ind);
-      }
-    }
-  }
-
-  _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
-    if (uses_map[ind] == 0)
-    {
-      tensor_builder->notifyLastUse(ind);
-    }
-  });
-
-  // Dispose and validate
-  for (const auto &ind : constants)
-  {
-    --uses_map[ind];
-    if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
-    {
-      tensor_builder->notifyLastUse(ind);
-    }
-  }
-
-  assert(
-    std::all_of(uses_map.begin(), uses_map.end(),
-                [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-
-  assert(
-    std::all_of(def_map.begin(), def_map.end(),
-                [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-}
-
-ITensorRegistry *BackendContext::genTensors()
-{
-  optimizer->optimize();
-
-  graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
-    if (external_operands().contains(ind))
-      return;
-
-    const auto frontend_layout = graph()->layout();
-    const auto backend_layout = operand_layouts().at(ind);
-    ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
-                                 obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
-    tensor_builder->registerTensorInfo(ind, backend_info, backend_layout);
-  });
-
-  // TODO Get compiler options from compiler, and use it rather than getting it from Env
-  if (util::getConfigString(util::config::EXECUTOR) == "Linear")
-  {
-    planTensors();
-  }
-  else
-  {
-    // For the executors that does not have fixed linear execution order:
-    // To make tensors never be deallocated, this is a workaround to use static memory planner
-    graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
-      if (tensor_builder->isRegistered(ind))
-        tensor_builder->notifyFirstUse(ind);
-    });
-  }
-
-  tensor_builder->prepare();
-
-  return tensor_registry.get();
-}
-
-FunctionMap BackendContext::genKernels()
-{
-  FunctionMap ret;
-
-  for (auto op_ind : _data.op_order)
-  {
-    auto fn_seq = kernel_gen->generate(op_ind);
-    ret.emplace_back(op_ind, std::move(fn_seq));
-  }
-
-  tensor_builder->allocate();
-  initConsts();
-
-  // NOTE For memory optimization, we want to free some operand data
-  const_cast<ir::Graph &>(*_data.graph)
-    .operands()
-    .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
-
-  for (auto &it : ret)
-  {
-    auto &fn_seq = it.second;
-    fn_seq->iterate([&](exec::IFunction &ifunc) {
-      ifunc.prepare();
-      tensor_builder->postFunctionPrepare();
-    });
-  }
-
-  return ret;
-}
-
-} // namespace acl_neon
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/acl_neon/BackendContext.h b/runtime/onert/backend/acl_neon/BackendContext.h
index 35d777f7b..b73dd188e 100644
--- a/runtime/onert/backend/acl_neon/BackendContext.h
+++ b/runtime/onert/backend/acl_neon/BackendContext.h
@@ -17,10 +17,11 @@
 #ifndef __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__
 #define __ONERT_BACKEND_ACL_NEON_BACKEND_CONTEXT_H__
 
-#include <backend/BackendContext.h>
-#include "TensorBuilder.h"
+#include <AclBackendContext.h>
+
 #include "ConstantInitializer.h"
 #include "KernelGenerator.h"
+#include "TensorBuilder.h"
 
 namespace onert
 {
@@ -31,34 +32,8 @@ namespace acl_neon
 
 class Optimizer;
 
-class BackendContext : public onert::backend::BackendContext
-{
-public:
-  BackendContext(const Backend *backend, ContextData &&data,
-                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
-                 std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
-                 std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
-                 std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
-    : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
-      tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, kernel_gen{
-                                                                                    kernel_gen}
-  {
-  }
-
-  ITensorRegistry *genTensors() override;
-  FunctionMap genKernels() override;
-
-private:
-  void initConsts();
-  void planTensors();
-
-public:
-  // TODO Make it private
-  std::shared_ptr<TensorBuilder> tensor_builder;
-  std::shared_ptr<ConstantInitializer> constant_initializer;
-  std::shared_ptr<KernelGenerator> kernel_gen;
-  std::shared_ptr<Optimizer> optimizer;
-};
+using BackendContext =
+  acl_common::AclBackendContext<TensorBuilder, ConstantInitializer, KernelGenerator, Optimizer>;
 
 } // namespace acl_neon
 } // namespace backend
diff --git a/runtime/onert/backend/acl_neon/ConstantInitializer.cc b/runtime/onert/backend/acl_neon/ConstantInitializer.cc
index 35da7c952..1bd702756 100644
--- a/runtime/onert/backend/acl_neon/ConstantInitializer.cc
+++ b/runtime/onert/backend/acl_neon/ConstantInitializer.cc
@@ -37,21 +37,7 @@ void ConstantInitializer::visit(const ir::operation::SpaceToBatchND &node)
 
   if (block_size_obj.isConstant())
   {
-    _init_map[block_size_index] = [](const ir::Operand &model_obj, backend::ITensor &obj) {
-      assert(model_obj.data());
-      const auto &shape = model_obj.shape();
-      const auto base = reinterpret_cast<const int32_t *>(model_obj.data()->base());
-      assert(model_obj.shape().rank() == 1);
-      obj.access([&](ITensor &tensor) {
-        for (size_t i = 0; i < shape.num_elements(); ++i)
-        {
-          const int32_t value = base[shape.num_elements() - i - 1];
-          int32_t *into = reinterpret_cast<int32_t *>(tensor.buffer() +
-                                                      tensor.calcOffset({static_cast<int32_t>(i)}));
-          *into = value;
-        }
-      });
-    };
+    _init_map[block_size_index] = acl_common::initReverseOrder<int32_t>;
   }
 
   const auto &paddings_index = node.getInputs().at(ir::operation::SpaceToBatchND::PADDINGS);
diff --git a/runtime/onert/backend/acl_neon/Optimizer.cc b/runtime/onert/backend/acl_neon/Optimizer.cc
index 781103f9c..283edd174 100644
--- a/runtime/onert/backend/acl_neon/Optimizer.cc
+++ b/runtime/onert/backend/acl_neon/Optimizer.cc
@@ -16,12 +16,12 @@
 
 #include "Optimizer.h"
 
-#include "ParentInfo.h"
+#include <AclSubTensorAnalyzer.h>
 
-#include <cassert>
 #include <compiler/LoweredGraph.h>
 #include <util/logging.h>
-#include "AclSubTensorAnalyzer.h"
+
+#include <cassert>
 
 namespace onert
 {
diff --git a/runtime/onert/backend/cl_common/CMakeLists.txt b/runtime/onert/backend/cl_common/CMakeLists.txt
new file mode 100644
index 000000000..c75129696
--- /dev/null
+++ b/runtime/onert/backend/cl_common/CMakeLists.txt
@@ -0,0 +1,7 @@
+file(GLOB_RECURSE SOURCES "src/*.cc")
+
+add_library(${LIB_ONERT_BACKEND_CL_COMMON} STATIC ${SOURCES})
+
+target_include_directories(${LIB_ONERT_BACKEND_CL_COMMON} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
+set_target_properties(${LIB_ONERT_BACKEND_CL_COMMON} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+target_link_libraries(${LIB_ONERT_BACKEND_CL_COMMON} PUBLIC onert_core)
diff --git a/runtime/onert/backend/cl_common/include/cl_common/BackendContext.h b/runtime/onert/backend/cl_common/include/cl_common/BackendContext.h
new file mode 100644
index 000000000..7bb72d74e
--- /dev/null
+++ b/runtime/onert/backend/cl_common/include/cl_common/BackendContext.h
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CL_COMMON_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_CL_COMMON_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include <ir/Index.h>
+#include <ir/OperandIndexMap.h>
+#include <ir/OperandIndexSequence.h>
+#include <util/logging.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace cl_common
+{
+
+// TODO Find better way to handle common code (reduce template)
+template <typename T_TensorBuilder, typename T_ConstantInitializer, typename T_KernelGenerator>
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+  BackendContext(const Backend *backend, ContextData &&data,
+                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+                 std::shared_ptr<T_TensorBuilder> tensor_builder = nullptr,
+                 std::shared_ptr<T_ConstantInitializer> constant_initializer = nullptr,
+                 std::shared_ptr<T_KernelGenerator> kernel_gen = nullptr)
+    : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
+      tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, kernel_gen{
+                                                                                    kernel_gen}
+  {
+  }
+
+  FunctionMap genKernels() override
+  {
+    FunctionMap ret;
+
+    // kernel_gen
+    for (auto op_ind : _data.op_order)
+    {
+      auto fn_seq = kernel_gen->generate(op_ind);
+      ret.emplace_back(op_ind, std::move(fn_seq));
+    }
+
+    tensor_builder->allocate();
+    initConsts();
+
+    // NOTE For memory optimization, we want to free some operand data
+    const_cast<ir::Graph &>(*_data.graph)
+      .operands()
+      .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
+
+    for (auto &it : ret)
+    {
+      auto &fn_seq = it.second;
+      fn_seq->iterate([&](exec::IFunction &ifunc) {
+        ifunc.prepare();
+        tensor_builder->postFunctionPrepare();
+      });
+    }
+
+    return ret;
+  }
+
+protected:
+  void initConsts()
+  {
+    _data.graph->operations().iterate([&](const ir::OperationIndex &, const ir::Operation &op) {
+      constant_initializer->setLayout(graph()->layout());
+      op.accept(*constant_initializer);
+    });
+
+    _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
+      if (_data.external_operands.contains(ind) || !operand.isConstant())
+        return;
+      const auto &obj = graph()->operands().at(ind);
+      if (obj.isConstant() && !constant_initializer->exist(ind))
+      {
+        constant_initializer->registerDefaultInitializer(ind, obj);
+      }
+    });
+
+    constant_initializer->run();
+  }
+
+  virtual void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                                  ir::Layout backend_layout) = 0;
+
+  void planTensors()
+  {
+    ir::OperandIndexMap<uint32_t> uses_map;
+    ir::OperandIndexMap<uint32_t> def_map;
+    ir::OperandIndexSequence constants;
+
+    // Prepare scanning
+    _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
+      if (_data.external_operands.contains(ind))
+        return;
+
+      uses_map[ind] = obj.getUses().size();
+      def_map[ind] = obj.getDef().valid() ? 1 : 0;
+
+      if (obj.isConstant())
+        constants.append(ind);
+
+      if (!tensor_builder->isRegistered(ind))
+      {
+        // These tensors do not exist in any operation (No use and def)
+        const auto info = obj.info();
+        const auto layout = _data.operand_layouts.at(ind);
+        // TODO Change tensor info to have permuted shape
+        registerTensorInfo(ind, info, layout);
+      }
+    });
+
+    // Start scanning to do notify{First|Last}Use for each tensor
+
+    // If a tensor is a constant, increase the use of the tensor and allocate it first.
+    // Increasing use count here makes the tensor never be deallocated, i.e it they will be
+    // deallocated last.
+    VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl;
+    for (const auto &ind : constants)
+    {
+      uses_map[ind]++;
+      tensor_builder->notifyFirstUse(ind);
+    }
+
+    // At each operation,
+    // 1. Scan DEF of outputs. If the DEF, allocate it
+    // 2. Scan DEF of inputs. If variable tensor, allocate it
+    // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
+    for (const auto op_ind : _data.op_order)
+    {
+      const auto &op = graph()->operations().at(op_ind);
+      auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+      auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
+
+      // Define outputs
+      for (const auto &ind : op_outputs)
+      {
+        if (!tensor_builder->isRegistered(ind))
+          continue;
+        assert(def_map.find(ind) != def_map.end());
+        if (def_map[ind])
+        {
+          def_map[ind] = 0;
+          tensor_builder->notifyFirstUse(ind);
+        }
+      }
+
+      // Scan variable tensors
+      // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
+      // non-constant because of less memory usage by memory planning in here
+      for (const auto &ind : op_inputs)
+      {
+        if (!tensor_builder->isRegistered(ind))
+          continue;
+        const auto &operand = graph()->operands().at(ind);
+        if (operand.info().isVariable())
+        {
+          // The variable tensor with buffer is not supported yet
+          assert(operand.data() == nullptr);
+          assert(operand.getUses().size() == 1 && !operand.getDef().valid());
+          assert(uses_map[ind] == 1 && def_map[ind] == 0);
+          tensor_builder->notifyFirstUse(ind);
+        }
+      }
+
+      for (const auto &ind : op_inputs)
+      {
+        if (!tensor_builder->isRegistered(ind))
+          continue;
+        assert(uses_map.find(ind) != uses_map.end());
+        assert(uses_map[ind] > 0);
+        uses_map[ind]--;
+        if (uses_map[ind] == 0)
+        {
+          // plan for deallocation of static tensornode
+          tensor_builder->notifyLastUse(ind);
+        }
+      }
+    }
+
+    _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
+      if (uses_map[ind] == 0)
+      {
+        tensor_builder->notifyLastUse(ind);
+      }
+    });
+
+    // Dispose and validate
+    for (const auto &ind : constants)
+    {
+      --uses_map[ind];
+      if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
+      {
+        tensor_builder->notifyLastUse(ind);
+      }
+    }
+
+    assert(
+      std::all_of(uses_map.begin(), uses_map.end(),
+                  [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+
+    assert(
+      std::all_of(def_map.begin(), def_map.end(),
+                  [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
+  }
+
+public:
+  // TODO Make it protected
+  std::shared_ptr<T_TensorBuilder> tensor_builder;
+  std::shared_ptr<T_ConstantInitializer> constant_initializer;
+  std::shared_ptr<T_KernelGenerator> kernel_gen;
+};
+
+} // namespace cl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CL_COMMON_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/cl_common/include/cl_common/LifetimeMap.h b/runtime/onert/backend/cl_common/include/cl_common/LifetimeMap.h
new file mode 100644
index 000000000..5fe5eec79
--- /dev/null
+++ b/runtime/onert/backend/cl_common/include/cl_common/LifetimeMap.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_CL_COMMON_LIFETIME_MAP_H__
+#define __ONERT_BACKEND_CL_COMMON_LIFETIME_MAP_H__
+
+#include "cl_common/ParentInfo.h"
+
+#include <ir/OperandIndexMap.h>
+
+#include <map>
+#include <vector>
+
+namespace onert
+{
+namespace backend
+{
+namespace cl_common
+{
+
+// TODO Abstract UserType into LifetimeMap and LifetimeSeq
+enum class UsesType
+{
+  FIRST,
+  LAST
+};
+
+// TODO Define class or struct for LifetimeMap and LifetimeSeq
+using LifetimeMap = std::map<size_t, std::pair<UsesType, ir::OperandIndex>>;
+using LifetimeSeq = std::vector<std::pair<UsesType, ir::OperandIndex>>;
+
+LifetimeMap createLifetimeMap(LifetimeSeq &seq, ir::OperandIndexMap<ParentInfo> &parent_map);
+
+} // namespace cl_common
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_CL_COMMON_LIFETIME_MAP_H__
diff --git a/runtime/onert/backend/gpu_cl/ParentInfo.h b/runtime/onert/backend/cl_common/include/cl_common/ParentInfo.h
index d7cb2d4fb..510211cb7 100644
--- a/runtime/onert/backend/gpu_cl/ParentInfo.h
+++ b/runtime/onert/backend/cl_common/include/cl_common/ParentInfo.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_BACKEND_PARENT_INFO_H__
-#define __ONERT_BACKEND_PARENT_INFO_H__
+#ifndef __ONERT_BACKEND_CL_COMMON_PARENT_INFO_H__
+#define __ONERT_BACKEND_CL_COMMON_PARENT_INFO_H__
 
 #include <ir/Index.h>
 #include <ir/Coordinates.h>
@@ -24,7 +24,7 @@ namespace onert
 {
 namespace backend
 {
-namespace gpu_cl
+namespace cl_common
 {
 
 /**
@@ -37,8 +37,8 @@ struct ParentInfo
   ir::Coordinates coordinates;
 };
 
-} // namespace gpu_cl
+} // namespace cl_common
 } // namespace backend
 } // namespace onert
 
-#endif // __ONERT_BACKEND_ACL_COMMON_PARENT_INFO_H__
+#endif // __ONERT_BACKEND_CL_COMMON_PARENT_INFO_H__
diff --git a/runtime/onert/backend/cl_common/src/LifetimeMap.cc b/runtime/onert/backend/cl_common/src/LifetimeMap.cc
new file mode 100644
index 000000000..0b17c58fb
--- /dev/null
+++ b/runtime/onert/backend/cl_common/src/LifetimeMap.cc
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "cl_common/LifetimeMap.h"
+
+#include <unordered_map>
+
+namespace onert
+{
+namespace backend
+{
+namespace cl_common
+{
+
+LifetimeMap createLifetimeMap(LifetimeSeq &lifetime_seq,
+                              ir::OperandIndexMap<ParentInfo> &parent_map)
+{
+  // Update lifetime sequence to apply subtensor optimization
+  std::unordered_map<ir::OperandIndex, ir::OperandIndex> root_map;
+  std::function<ir::OperandIndex &(ir::OperandIndex)> find_root =
+    [&](ir::OperandIndex ind) -> ir::OperandIndex & {
+    ir::OperandIndex &ret = root_map[ind];
+
+    // We know the root parent value already
+    if (ret.valid())
+      return ret;
+
+    auto itr = parent_map.find(ind);
+    if (itr == parent_map.end())
+    {
+      // If there is no parent, let's store the value of itself
+      return ret = ind;
+    }
+    else
+    {
+      return ret = find_root(itr->second.parent);
+    }
+  };
+
+  ir::OperandIndexMap<bool> first_use_check;
+  ir::OperandIndexMap<bool> last_use_check;
+  LifetimeMap lifetime_map;
+  for (size_t i = 0; i < lifetime_seq.size(); i++)
+  {
+    auto &entry = lifetime_seq[i];
+    if (entry.first != UsesType::FIRST)
+      continue;
+    auto root_ind = find_root(entry.second);
+    if (first_use_check[root_ind])
+      continue;
+    first_use_check[root_ind] = true;
+    lifetime_map[i] = {UsesType::FIRST, root_ind};
+  }
+
+  for (int i = lifetime_seq.size() - 1; i >= 0; i--)
+  {
+    auto &entry = lifetime_seq[i];
+    if (entry.first != UsesType::LAST)
+      continue;
+    auto root_ind = find_root(entry.second);
+    if (last_use_check[root_ind])
+      continue;
+    last_use_check[root_ind] = true;
+    lifetime_map[i] = {UsesType::LAST, root_ind};
+  }
+
+  return lifetime_map;
+}
+
+} // namespace cl_common
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/cpu/ops/OperationUtils.cc b/runtime/onert/backend/cpu/ops/OperationUtils.cc
index 8ac875842..aa4ef352e 100644
--- a/runtime/onert/backend/cpu/ops/OperationUtils.cc
+++ b/runtime/onert/backend/cpu/ops/OperationUtils.cc
@@ -194,7 +194,7 @@ void CalculateActivationRangeQuantized(ir::Activation activation, const IPortabl
   }
   else
   {
-    std::cout << "Unsupported fused activation function." << std::endl;
+    throw std::runtime_error{"Unsupported fused activation function."};
   }
 }
 
diff --git a/runtime/onert/backend/cpu/ops/OperationUtils.h b/runtime/onert/backend/cpu/ops/OperationUtils.h
index ac2fbb84f..1fefc3228 100644
--- a/runtime/onert/backend/cpu/ops/OperationUtils.h
+++ b/runtime/onert/backend/cpu/ops/OperationUtils.h
@@ -18,19 +18,19 @@
 #define __NNFW_SUPPORT_NNAPI_OPERATION_UTILS_H__
 
 #include <backend/IPortableTensor.h>
-
-#include <cker/Shape.h>
-#include <cker/Types.h>
-#include <iostream>
 #include <ir/DataType.h>
-#include <ir/InternalType.h>
 #include <ir/Operand.h>
 #include <ir/Padding.h>
+#include <util/CalculateActivationRange.h>
+
+#include <cker/Shape.h>
+#include <cker/Types.h>
 
 #include <limits>
 #include <vector>
 
 using OperandType = onert::ir::DataType;
+using namespace onert::util;
 
 namespace onert
 {
@@ -166,40 +166,6 @@ void GetQuantizedConvolutionMultipliersAndShifts(
   int num_channels, std::vector<int32_t> &per_channel_output_multiplier,
   std::vector<int> &per_channel_output_shift);
 
-template <typename T>
-void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
-{
-  if (activation == ir::Activation::RELU)
-  {
-    *activation_min = 0;
-    *activation_max = std::numeric_limits<T>::max();
-  }
-  else if (activation == ir::Activation::RELU6)
-  {
-    *activation_min = 0;
-    *activation_max = 6;
-  }
-  else if (activation == ir::Activation::RELU1)
-  {
-    *activation_min = -1;
-    *activation_max = 1;
-  }
-  else if (activation == ir::Activation::SIGMOID)
-  {
-    *activation_min = 0;
-    *activation_max = 1;
-  }
-  else if (activation == ir::Activation::NONE)
-  {
-    *activation_min = std::numeric_limits<T>::lowest();
-    *activation_max = std::numeric_limits<T>::max();
-  }
-  else
-  {
-    std::cout << "Unsupported fused activation function." << std::endl;
-  }
-}
-
 void CalculateActivationRangeQuantized(ir::Activation activation, const IPortableTensor *output,
                                        int32_t *act_min, int32_t *act_max);
 
diff --git a/runtime/onert/backend/gpu_cl/Backend.h b/runtime/onert/backend/gpu_cl/Backend.h
index dc0b8596c..d67ba1602 100644
--- a/runtime/onert/backend/gpu_cl/Backend.h
+++ b/runtime/onert/backend/gpu_cl/Backend.h
@@ -22,13 +22,13 @@
 
 #include "BackendContext.h"
 #include "Config.h"
-#include "ClTensorRegistry.h"
+#include "TensorRegistry.h"
 #include "KernelGenerator.h"
 #include "TensorManager.h"
 #include "TensorBuilder.h"
 
-#include "open_cl/Environment.h"
-#include "open_cl/Status.h"
+#include "tensorflow/lite/delegates/gpu/cl/environment.h"
+#include "tensorflow/lite/delegates/gpu/common/status.h"
 
 namespace onert
 {
@@ -50,22 +50,22 @@ public:
     const auto &operands = data.graph->operands();
     auto context = std::make_unique<gpu_cl::BackendContext>(this, std::move(data));
 
-    auto environment = std::make_shared<Environment>();
+    auto environment = std::make_shared<tflite::gpu::cl::Environment>();
     if (!CreateEnvironment(environment.get()).ok())
     {
       return nullptr;
     }
     auto tm = createTensorManager(&environment->context());
 
-    auto tr = std::make_shared<ClTensorRegistry<TensorManager>>(tm);
+    auto tr = std::make_shared<TensorRegistry>(tm);
 
-    InferenceContext::CreateInferenceInfo create_info;
-    create_info.precision = CalculationsPrecision::F32;
+    tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info;
+    create_info.precision = tflite::gpu::cl::CalculationsPrecision::F32;
     create_info.storage_type =
-      GetStorageTypeWithMinimalMemoryConsumption(environment->device().GetInfo());
-    create_info.hints.Add(ModelHints::kFastestInference);
+      tflite::gpu::cl::GetStorageTypeWithMinimalMemoryConsumption(environment->device().GetInfo());
+    create_info.hints.Add(tflite::gpu::cl::ModelHints::kFastestInference);
 
-    auto cc = std::make_shared<CreationContext>();
+    auto cc = std::make_shared<tflite::gpu::cl::CreationContext>();
     cc->device = environment->GetDevicePtr();
     cc->context = &environment->context();
     cc->queue = environment->queue();
diff --git a/runtime/onert/backend/gpu_cl/BackendContext.cc b/runtime/onert/backend/gpu_cl/BackendContext.cc
index 6c3ac81a2..ec9442155 100644
--- a/runtime/onert/backend/gpu_cl/BackendContext.cc
+++ b/runtime/onert/backend/gpu_cl/BackendContext.cc
@@ -33,147 +33,26 @@ namespace backend
 namespace gpu_cl
 {
 
-void BackendContext::initConsts()
+void BackendContext::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                                        ir::Layout backend_layout)
 {
-  _data.graph->operations().iterate([&](const ir::OperationIndex &, const ir::Operation &op) {
-    constant_initializer->setLayout(graph()->layout());
-    op.accept(*constant_initializer);
-  });
-  _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
-    if (_data.external_operands.contains(ind) || !operand.isConstant())
-      return;
-    const auto &obj = graph()->operands().at(ind);
-    if (obj.isConstant() && !constant_initializer->exist(ind))
-    {
-      constant_initializer->registerDefaultInitializer(ind, obj);
-    }
-  });
-
-  constant_initializer->run();
+  TensorType type = TensorType::TENSOR_TYPE_VALID;
+  tensor_builder->registerTensorInfo(ind, info, backend_layout, type);
 }
 
-void BackendContext::planTensors()
+ITensorRegistry *BackendContext::genTensors()
 {
-  ir::OperandIndexMap<uint32_t> uses_map;
-  ir::OperandIndexMap<uint32_t> def_map;
-  ir::OperandIndexSequence constants;
-
-  // Prepare scanning
-  _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
-    if (_data.external_operands.contains(ind))
-      return;
-    uses_map[ind] = obj.getUses().size();
-    def_map[ind] = obj.getDef().valid() ? 1 : 0;
-
-    if (obj.isConstant())
-      constants.append(ind);
-
-    if (!tensor_builder->isRegistered(ind))
-    {
-      // These tensors do not exist in any operation (No use and def)
-      const auto info = obj.info();
-      const auto layout = _data.operand_layouts.at(ind);
-      // TODO Change tensor info to have permuted shape
-      tensor_builder->registerTensorInfo(ind, info, layout);
-    }
-  });
-
-  // Start scanning to do notify{First|Last}Use for each tensor
+  ir::OperandIndexMap<TensorType> type_map;
 
-  // If a tensor is a constant, increase the use of the tensor and allocate it first.
-  // Increasing use count here makes the tensor never be deallocated, i.e it they will be
-  // deallocated last.
-  VERBOSE(planTensors) << "TENSORS as CONSTANT" << std::endl;
-  for (const auto &ind : constants)
+  for (const auto &ind : graph()->getInputs())
   {
-    uses_map[ind]++;
-    tensor_builder->notifyFirstUse(ind);
+    type_map[ind] = TensorType::TENSOR_TYPE_INPUT;
   }
 
-  // At each operation,
-  // 1. Scan DEF of outputs. If the DEF, allocate it
-  // 2. Scan DEF of inputs. If variable tensor, allocate it
-  // 3. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
-  for (const auto op_ind : _data.op_order)
+  for (const auto &ind : graph()->getOutputs())
   {
-    const auto &op = graph()->operations().at(op_ind);
-    auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
-    auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
-
-    // Define outputs
-    for (const auto &ind : op_outputs)
-    {
-      if (!tensor_builder->isRegistered(ind))
-        continue;
-      assert(def_map.find(ind) != def_map.end());
-      if (def_map[ind])
-      {
-        def_map[ind] = 0;
-        tensor_builder->notifyFirstUse(ind);
-      }
-    }
-
-    // Scan variable tensors
-    // This tensor has features like constant. But OperandInfo and LowerInfo treat them as
-    // non-constant because of less memory usage by memory planning in here
-    for (const auto &ind : op_inputs)
-    {
-      if (!tensor_builder->isRegistered(ind))
-        continue;
-      const auto &operand = graph()->operands().at(ind);
-      if (operand.info().isVariable())
-      {
-        // The variable tensor with buffer is not supported yet
-        assert(operand.data() == nullptr);
-        assert(operand.getUses().size() == 1 && !operand.getDef().valid());
-        assert(uses_map[ind] == 1 && def_map[ind] == 0);
-        tensor_builder->notifyFirstUse(ind);
-      }
-    }
-
-    for (const auto &ind : op_inputs)
-    {
-      if (!tensor_builder->isRegistered(ind))
-        continue;
-      assert(uses_map.find(ind) != uses_map.end());
-      assert(uses_map[ind] > 0);
-      uses_map[ind]--;
-      if (uses_map[ind] == 0)
-      {
-        // plan for deallocation of static tensornode
-        tensor_builder->notifyLastUse(ind);
-      }
-    }
+    type_map[ind] = TensorType::TENSOR_TYPE_OUTPUT;
   }
-
-  _data.graph->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &) {
-    if (uses_map[ind] == 0)
-    {
-      tensor_builder->notifyLastUse(ind);
-    }
-  });
-
-  // Dispose and validate
-  for (const auto &ind : constants)
-  {
-    --uses_map[ind];
-    if (uses_map[ind] == 0) // To prevent notifyLastUse from being called twice
-    {
-      tensor_builder->notifyLastUse(ind);
-    }
-  }
-
-  assert(
-    std::all_of(uses_map.begin(), uses_map.end(),
-                [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-
-  assert(
-    std::all_of(def_map.begin(), def_map.end(),
-                [](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
-}
-
-ITensorRegistry *BackendContext::genTensors()
-{
   graph()->operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
     if (external_operands().contains(ind))
       return;
@@ -182,7 +61,11 @@ ITensorRegistry *BackendContext::genTensors()
     const auto backend_layout = operand_layouts().at(ind);
     ir::OperandInfo backend_info{permuteShape(obj.shape(), frontend_layout, backend_layout),
                                  obj.typeInfo(), obj.info().memAllocType(), obj.isConstant()};
-    tensor_builder->registerTensorInfo(ind, backend_info, backend_layout);
+    if (obj.isConstant())
+    {
+      type_map[ind] = TensorType::TENSOR_TYPE_INPUT;
+    }
+    tensor_builder->registerTensorInfo(ind, backend_info, backend_layout, type_map[ind]);
   });
 
   // TODO Get compiler options from compiler, and use it rather than getting it from Env
@@ -199,44 +82,10 @@ ITensorRegistry *BackendContext::genTensors()
         tensor_builder->notifyFirstUse(ind);
     });
   }
-
   tensor_builder->prepare();
-
   return tensor_registry.get();
 }
 
-FunctionMap BackendContext::genKernels()
-{
-  FunctionMap ret;
-
-  // kernel_gen
-  for (auto op_ind : _data.op_order)
-  {
-    auto fn_seq = kernel_gen->generate(op_ind);
-    ret.emplace_back(op_ind, std::move(fn_seq));
-  }
-
-  tensor_builder->allocate();
-
-  initConsts();
-
-  // NOTE For memory optimization, we want to free some operand data
-  const_cast<ir::Graph &>(*_data.graph)
-    .operands()
-    .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
-
-  for (auto &it : ret)
-  {
-    auto &fn_seq = it.second;
-    fn_seq->iterate([&](exec::IFunction &ifunc) {
-      ifunc.prepare();
-      tensor_builder->postFunctionPrepare();
-    });
-  }
-
-  return ret;
-}
-
 } // namespace gpu_cl
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/BackendContext.h b/runtime/onert/backend/gpu_cl/BackendContext.h
index f17489e7a..7412d2bce 100644
--- a/runtime/onert/backend/gpu_cl/BackendContext.h
+++ b/runtime/onert/backend/gpu_cl/BackendContext.h
@@ -20,10 +20,12 @@
 #include <backend/BackendContext.h>
 #include <util/ConfigSource.h>
 
+#include <cl_common/BackendContext.h>
+
 #include "ConstantInitializer.h"
 #include "KernelGenerator.h"
 #include "TensorBuilder.h"
-#include "open_cl/InferenceContext.h"
+#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
 
 namespace onert
 {
@@ -32,31 +34,28 @@ namespace backend
 namespace gpu_cl
 {
 
-class BackendContext : public onert::backend::BackendContext
+class BackendContext
+  : public onert::backend::cl_common::BackendContext<TensorBuilder, ConstantInitializer,
+                                                     KernelGenerator>
 {
 public:
   BackendContext(const Backend *backend, ContextData &&data,
-                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+                 std::shared_ptr<TensorRegistry> tensor_registry = nullptr,
                  std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
                  std::shared_ptr<ConstantInitializer> constant_initializer = nullptr,
                  std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
-    : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
-      tensor_builder{tensor_builder}, constant_initializer{constant_initializer}, kernel_gen{
-                                                                                    kernel_gen}
+    : onert::backend::cl_common::BackendContext<TensorBuilder, ConstantInitializer,
+                                                KernelGenerator>(
+        backend, std::move(data), tensor_registry, tensor_builder, constant_initializer, kernel_gen)
   {
+    // DO NOTHING
   }
 
   ITensorRegistry *genTensors() override;
-  FunctionMap genKernels() override;
-
-private:
-  void initConsts();
-  void planTensors();
 
-public:
-  std::shared_ptr<TensorBuilder> tensor_builder;
-  std::shared_ptr<ConstantInitializer> constant_initializer;
-  std::shared_ptr<KernelGenerator> kernel_gen;
+protected:
+  void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                          ir::Layout backend_layout) override;
 };
 
 } // namespace gpu_cl
diff --git a/runtime/onert/backend/gpu_cl/CMakeLists.txt b/runtime/onert/backend/gpu_cl/CMakeLists.txt
index 49bae37f8..eb1964214 100644
--- a/runtime/onert/backend/gpu_cl/CMakeLists.txt
+++ b/runtime/onert/backend/gpu_cl/CMakeLists.txt
@@ -1,14 +1,14 @@
 set(LIB_ONERT_BACKEND_GPU_CL onert_backend_gpu_cl)
 
+if(NOT BUILD_GPU_CL)
+  return()
+endif(NOT BUILD_GPU_CL)
+
 nnas_find_package(Opencl_Headers QUIET)
 if(NOT Opencl_Headers_FOUND)
   return()
 endif(NOT Opencl_Headers_FOUND)
 
-if(NOT BUILD_GPU_CL)
-  return()
-endif(NOT BUILD_GPU_CL)
-
 nnas_find_package(Farmhash QUIET)
 if(NOT Farmhash_FOUND)
   return()
@@ -19,18 +19,32 @@ if(NOT Abseil_FOUND)
   return()
 endif(NOT Abseil_FOUND)
 
-file(GLOB_RECURSE SOURCES "*.cc")
+nnfw_find_package(Fp16 QUIET)
+if(NOT Fp16_FOUND)
+  return()
+endif(NOT Fp16_FOUND)
 
+nnas_find_package(TensorFlowGpu QUIET)
+if(NOT TensorFlowGpu_FOUND)
+  message(FATAL_ERROR 'TensorFlowGpu lib not found')
+  return()
+endif(NOT TensorFlowGpu_FOUND)
+
+file(GLOB_RECURSE SOURCES "*.cc")
 
 add_library(${LIB_ONERT_BACKEND_GPU_CL} SHARED ${SOURCES})
 
 target_include_directories(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
+target_include_directories(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${TENSORFLOWGPU_SOURCE_DIR})
 
 target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE abseil)
 target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE dl)
 target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE farmhash)
-target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE Headers)
+target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} INTERFACE Open_CL_Headers)
+target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE fp16)
+target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE TensorFlowGpu)
 target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE onert_core)
+target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE ${LIB_ONERT_BACKEND_CL_COMMON})
 target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE nnfw_common)
 target_link_libraries(${LIB_ONERT_BACKEND_GPU_CL} PRIVATE nnfw_coverage)
 
diff --git a/runtime/onert/backend/gpu_cl/ClConstantInitializer.cc b/runtime/onert/backend/gpu_cl/ClConstantInitializer.cc
index b3ef2f560..05dd8e2a3 100644
--- a/runtime/onert/backend/gpu_cl/ClConstantInitializer.cc
+++ b/runtime/onert/backend/gpu_cl/ClConstantInitializer.cc
@@ -93,6 +93,9 @@ void ClConstantInitializer::registerPermuteInitializer(const ir::OperandIndex &i
     case DataType::FLOAT32:
       _init_map[index] = std::bind(permuteInit<float>, _1, _2, _current_layout);
       break;
+    case DataType::INT32:
+      _init_map[index] = std::bind(permuteInit<int32_t>, _1, _2, _current_layout);
+      break;
     default:
       throw std::runtime_error("Not supported, yet");
       break;
diff --git a/runtime/onert/backend/gpu_cl/ClConstantInitializer.h b/runtime/onert/backend/gpu_cl/ClConstantInitializer.h
index d7d21e847..95e228acd 100644
--- a/runtime/onert/backend/gpu_cl/ClConstantInitializer.h
+++ b/runtime/onert/backend/gpu_cl/ClConstantInitializer.h
@@ -17,8 +17,6 @@
 #ifndef __ONERT_COMPILER_GPU_CL_CLCONSTANT_INITIALIZER_H__
 #define __ONERT_COMPILER_GPU_CL_CLCONSTANT_INITIALIZER_H__
 
-#include "ClTensorRegistry.h"
-
 #include <unordered_map>
 #include <functional>
 
diff --git a/runtime/onert/backend/gpu_cl/ClFunction.h b/runtime/onert/backend/gpu_cl/ClFunction.h
index 9d3d69092..5e8a11a84 100644
--- a/runtime/onert/backend/gpu_cl/ClFunction.h
+++ b/runtime/onert/backend/gpu_cl/ClFunction.h
@@ -22,9 +22,9 @@
 #include <vector>
 #include <memory>
 
-#include "open_cl/kernels/GpuOperation.h"
-#include "open_cl/ClCommandQueue.h"
-#include "open_cl/Status.h"
+#include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
+#include "tensorflow/lite/delegates/gpu/common/status.h"
 
 namespace onert
 {
@@ -32,19 +32,18 @@ namespace backend
 {
 namespace gpu_cl
 {
-
 class ClFunction : public ::onert::exec::IFunction
 {
 public:
   ClFunction() : _gpu_operations(), _creation_context() {}
 
 public:
-  void configure(std::shared_ptr<CreationContext> creation_context)
+  void configure(std::shared_ptr<tflite::gpu::cl::CreationContext> creation_context)
   {
     _creation_context = creation_context;
   }
 
-  void add_operation(std::unique_ptr<GPUOperation> gpu_operation)
+  void add_operation(std::unique_ptr<tflite::gpu::cl::GPUOperation> gpu_operation)
   {
     _gpu_operations.push_back(std::move(gpu_operation));
   }
@@ -57,6 +56,10 @@ public:
       {
         throw std::runtime_error("Failed to AddToQueue.");
       }
+      if (!_creation_context->queue->WaitForCompletion().ok())
+      {
+        throw std::runtime_error("Failed to WaitForCompletion.");
+      }
     }
   }
 
@@ -77,8 +80,8 @@ public:
   }
 
 private:
-  std::vector<std::unique_ptr<GPUOperation>> _gpu_operations;
-  std::shared_ptr<CreationContext> _creation_context;
+  std::vector<std::unique_ptr<tflite::gpu::cl::GPUOperation>> _gpu_operations;
+  std::shared_ptr<tflite::gpu::cl::CreationContext> _creation_context;
 };
 
 } // namespace gpu_cl
diff --git a/runtime/onert/backend/gpu_cl/ClMemoryManager.h b/runtime/onert/backend/gpu_cl/ClMemoryManager.h
deleted file mode 100644
index 3bac0d51d..000000000
--- a/runtime/onert/backend/gpu_cl/ClMemoryManager.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_ACL_COMMON_MEMORY_MANAGER_H__
-#define __ONERT_BACKEND_ACL_COMMON_MEMORY_MANAGER_H__
-
-#include <cassert>
-
-#include "ir/OperandIndexMap.h"
-#include "ir/Shape.h"
-#include "open_cl/ClContext.h"
-#include "open_cl/InferenceContext.h"
-#include "open_cl/Status.h"
-#include "open_cl/StorageTypeUtil.h"
-#include "open_cl/TensorType.h"
-#include "util/logging.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-template <typename T_ITensor, typename T_Tensor> class ClMemoryManager
-{
-public:
-  ClMemoryManager(CLContext *context) : _context{context} {}
-
-  virtual ~ClMemoryManager() = default;
-
-  virtual void allocate(void)
-  {
-    for (const auto &tensor_entry : _tensors)
-    {
-      auto tensor = tensor_entry.second;
-      const auto &t = tensor_reserver_.Get(tensor_entry.first.value());
-      const auto &shape = t->shape;
-      const auto &descriptor = t->descriptor;
-      if (!CreateTensor(*_context, shape, descriptor, tensor->handle()).ok())
-      {
-        return;
-      }
-    }
-  }
-
-  virtual void deallocate(void)
-  {
-    // NYI
-  }
-
-  virtual void startLifetime(const ir::OperandIndex &)
-  { /* DO NOTHING */
-  }
-  virtual void finishLifetime(const ir::OperandIndex &)
-  { /* DO NOTHING */
-  }
-
-  void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info,
-                   InferenceContext::CreateInferenceInfo create_info,
-                   std::shared_ptr<Environment> environment, DeviceInfo &device_info)
-  {
-    ValueId max_id = 0;
-    auto data_type = DeduceDataTypeFromPrecision(create_info.precision);
-    const auto shape = info.shape();
-
-    auto tensor = std::make_shared<T_Tensor>(shape.rank(), shape, environment);
-    _tensors[ind] = tensor;
-
-    BHWC t_shape;
-    switch (shape.rank())
-    {
-      case 1:
-        // B layout
-        t_shape = BHWC(shape.dim(0), 1, 1, 1);
-        break;
-      case 2:
-        // BC layout
-        t_shape = BHWC(shape.dim(0), 1, 1, shape.dim(1));
-        break;
-      case 3:
-        // BWC layout
-        t_shape = BHWC(shape.dim(0), 1, shape.dim(1), shape.dim(2));
-        break;
-      case 4:
-        // BHWC layout
-        t_shape = BHWC(shape.dim(0), shape.dim(1), shape.dim(2), shape.dim(3));
-        break;
-      default:
-        break;
-    }
-
-    TensorStorageType storage_type = create_info.storage_type;
-    Layout layout = t_shape.b == 1 ? Layout::HWC : Layout::BHWC;
-
-    ValueId id = ind.value();
-    storage_type = SelectBestStorageType(device_info, t_shape, storage_type, data_type, layout);
-    auto dummy = std::make_shared<InferenceContext::DummyTensor>();
-    dummy->shape = t_shape;
-    dummy->descriptor = TensorDescriptor{data_type, storage_type, layout};
-    tensor_reserver_.Add(id, dummy);
-
-    max_id = std::max(max_id, id);
-
-    tensor_reserver_.SetNext(max_id + 1);
-  }
-
-  ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &tensors(void) { return _tensors; }
-
-  InferenceContext::TensorReserver &tensorReservers(void) { return tensor_reserver_; }
-
-private:
-  ir::OperandIndexMap<std::shared_ptr<T_Tensor>> _tensors;
-  InferenceContext::TensorReserver tensor_reserver_;
-  CLContext *_context;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_ACL_COMMON_MEMORY_MANAGER_H__
diff --git a/runtime/onert/backend/gpu_cl/ClTensorBuilder.h b/runtime/onert/backend/gpu_cl/ClTensorBuilder.h
deleted file mode 100644
index 951bbd844..000000000
--- a/runtime/onert/backend/gpu_cl/ClTensorBuilder.h
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_CL_TENSOR_BUILDER_H__
-#define __ONERT_BACKEND_CL_TENSOR_BUILDER_H__
-
-#include <memory>
-#include <queue>
-
-#include "ClTensorManager.h"
-#include "ClTensorRegistry.h"
-#include "ParentInfo.h"
-
-#include "open_cl/TensorType.h"
-#include "open_cl/TensorTypeUtil.h"
-#include "open_cl/ClDevice.h"
-#include "open_cl/InferenceContext.h"
-
-#include "ir/OperandIndexMap.h"
-#include "ir/OperandIndexSequence.h"
-#include <ir/Operands.h>
-#include <util/Utils.h>
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum class UsesType
-{
-  FIRST,
-  LAST
-};
-
-template <typename T_ITensor, typename T_Tensor> class ClTensorBuilder
-{
-public:
-  using T_ClTensorManager = ClTensorManager<T_ITensor, T_Tensor>;
-
-  ClTensorBuilder(const ir::Operands &operands, T_ClTensorManager *tensor_mgr,
-                  InferenceContext::CreateInferenceInfo create_info,
-                  const std::shared_ptr<Environment> &environment);
-
-  /**
-   * @brief     Register tensor information to allocate on ACL-CL backend
-   * @param[in] ind    Operand index
-   * @param[in] info   Tensor information
-   * @param[in] layout Tensor data layout
-   */
-  void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
-                          ir::Layout backend_layout);
-
-  void notifyFirstUse(const ir::OperandIndex &);
-  void notifyLastUse(const ir::OperandIndex &);
-
-  bool isRegistered(const ir::OperandIndex &) const;
-
-  void prepare();
-  void allocate();
-  void postFunctionPrepare();
-
-  T_ClTensorManager *cl_tensor_manager(void) { return _tensor_mgr.get(); }
-
-  void setUsesCount(const ir::OperandIndex &index, size_t num_uses)
-  {
-    assert(_uses_count_map.find(index) != _uses_count_map.end() ? _uses_count_map[index] == num_uses
-                                                                : true);
-    _uses_count_map[index] = num_uses;
-  }
-
-  void parent_map(std::unordered_map<ir::OperandIndex, ParentInfo> &&parent_map)
-  {
-    _parent_map = std::move(parent_map);
-  }
-
-  bool areSubTensorsOf(const ir::OperandIndex &parent, const ir::OperandIndexSequence &seq);
-
-  /**
-   * @brief     Check child tensor is allocated as subtensor of parent tensor
-   * @param[in] parent  Index of parent
-   * @param[in] child   Index of child
-   * @return    @c true if child is allocated as subtensor of parent, otherwise @c false
-   */
-  bool isSubTensorOf(const ir::OperandIndex &parent, const ir::OperandIndex &child);
-
-private:
-  void buildTensors(void);
-  ir::OperandIndex findRootParent(ir::OperandIndex index);
-
-private:
-  const ir::Operands &_operands;
-  ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
-  ir::OperandIndexMap<ir::Layout> _tensor_layout_map;
-  ir::OperandIndexMap<size_t> _uses_count_map;
-
-  std::unique_ptr<T_ClTensorManager> _tensor_mgr;
-  InferenceContext::CreateInferenceInfo _create_info;
-  std::shared_ptr<Environment> _environment;
-
-  // for linear executor
-  std::vector<std::pair<UsesType, ir::OperandIndex>> _lifetime_seq;
-
-  // Extra info for concat elimination
-  ir::OperandIndexMap<ParentInfo> _parent_map;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#include <cassert>
-#include <stack>
-
-#include "util/logging.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-template <typename T_ITensor, typename T_Tensor>
-ClTensorBuilder<T_ITensor, T_Tensor>::ClTensorBuilder(
-  const ir::Operands &operands, T_ClTensorManager *tensor_mgr,
-  InferenceContext::CreateInferenceInfo create_info,
-  const std::shared_ptr<Environment> &environment)
-  : _operands{operands}, _tensor_mgr{tensor_mgr}, _create_info{create_info}, _environment{
-                                                                               environment}
-{
-  assert(_tensor_mgr);
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorBuilder<T_ITensor, T_Tensor>::registerTensorInfo(const ir::OperandIndex &ind,
-                                                              const ir::OperandInfo &info,
-                                                              ir::Layout backend_layout)
-{
-  assert(_tensor_mgr->constTensors().size() == 0);
-  assert(_tensor_mgr->nonconstTensors().size() == 0);
-
-  _uses_count_map[ind] = _operands.at(ind).getUses().size();
-
-  _tensor_info_map.emplace(ind, info);
-  _tensor_layout_map.insert({ind, backend_layout});
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorBuilder<T_ITensor, T_Tensor>::notifyFirstUse(const ir::OperandIndex &ind)
-{
-  _lifetime_seq.emplace_back(UsesType::FIRST, ind);
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorBuilder<T_ITensor, T_Tensor>::notifyLastUse(const ir::OperandIndex &ind)
-{
-  _lifetime_seq.emplace_back(UsesType::LAST, ind);
-}
-
-template <typename T_ITensor, typename T_Tensor>
-bool ClTensorBuilder<T_ITensor, T_Tensor>::isRegistered(const ir::OperandIndex &ind) const
-{
-  return _tensor_info_map.find(ind) != _tensor_info_map.end();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorBuilder<T_ITensor, T_Tensor>::prepare(void)
-{
-  buildTensors();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorBuilder<T_ITensor, T_Tensor>::allocate(void)
-{
-  // Update lifetime sequence to apply subtensor optimization
-
-  std::unordered_map<ir::OperandIndex, ir::OperandIndex> root_map;
-  std::function<ir::OperandIndex &(ir::OperandIndex)> find_root =
-    [&](ir::OperandIndex ind) -> ir::OperandIndex & {
-    ir::OperandIndex &ret = root_map[ind];
-
-    // We know the root parent value already
-    if (ret.valid())
-      return ret;
-
-    auto itr = _parent_map.find(ind);
-    if (itr == _parent_map.end())
-    {
-      // If there is no parent, let's store the value of itself
-      return ret = ind;
-    }
-    else
-    {
-      return ret = find_root(itr->second.parent);
-    }
-  };
-
-  ir::OperandIndexMap<bool> first_use_check;
-  ir::OperandIndexMap<bool> last_use_check;
-  std::map<size_t, std::pair<UsesType, ir::OperandIndex>> lifetime_map;
-  for (size_t i = 0; i < _lifetime_seq.size(); i++)
-  {
-    auto &entry = _lifetime_seq[i];
-    if (entry.first != UsesType::FIRST)
-      continue;
-    auto root_ind = find_root(entry.second);
-    if (first_use_check[root_ind])
-      continue;
-    first_use_check[root_ind] = true;
-    lifetime_map[i] = {UsesType::FIRST, root_ind};
-  }
-
-  for (int i = _lifetime_seq.size() - 1; i >= 0; i--)
-  {
-    auto &entry = _lifetime_seq[i];
-    if (entry.first != UsesType::LAST)
-      continue;
-    auto root_ind = find_root(entry.second);
-    if (last_use_check[root_ind])
-      continue;
-    last_use_check[root_ind] = true;
-    lifetime_map[i] = {UsesType::LAST, root_ind};
-  }
-
-  for (auto &entry : lifetime_map)
-  {
-    auto &use = entry.second;
-    auto use_type = use.first;
-    auto use_index = use.second;
-    assert(use_index.valid());
-    if (use_type == UsesType::FIRST)
-      _tensor_mgr->startLifetime(use_index);
-    else
-      _tensor_mgr->finishLifetime(use_index);
-  }
-
-  _tensor_mgr->allocateConsts();
-
-  // TODO Since `_parent_map` is filled for all Concat nodes even if the node this backend uses
-  //      After refactoring BackendContext we can uncomment this
-  // assert(_tensor_info_map.size() ==
-  //       _tensor_mgr->nonconstTensors().size() + num of constants of _tensor_info_map +
-  //       _parent_map.size());
-  _tensor_mgr->allocateNonconsts();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorBuilder<T_ITensor, T_Tensor>::postFunctionPrepare(void)
-{
-  _tensor_mgr->tryDeallocConstants();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorBuilder<T_ITensor, T_Tensor>::buildTensors(void)
-{
-  assert(_tensor_mgr->constTensors().size() == 0);
-  assert(_tensor_mgr->nonconstTensors().size() == 0);
-  // Normal tensors
-  for (auto &entry : _tensor_info_map)
-  {
-    auto ind = entry.first;
-    if (_parent_map.count(ind) > 0)
-      continue;
-
-    const auto &info = entry.second;
-    _tensor_mgr->buildTensor(ind, info, _create_info, _environment, _environment->device().info_);
-  }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_ACL_COMMON_TEMPL_TENSOR_BUILDER_H__
diff --git a/runtime/onert/backend/gpu_cl/ClTensorManager.h b/runtime/onert/backend/gpu_cl/ClTensorManager.h
deleted file mode 100644
index 49a11730f..000000000
--- a/runtime/onert/backend/gpu_cl/ClTensorManager.h
+++ /dev/null
@@ -1,235 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_ACL_COMMON_TENSOR_MANAGER_H__
-#define __ONERT_BACKEND_ACL_COMMON_TENSOR_MANAGER_H__
-
-#include "ClMemoryManager.h"
-
-#include "open_cl/InferenceContext.h"
-#include "open_cl/TensorType.h"
-
-#include "ir/OperandInfo.h"
-#include "ir/OperandIndexMap.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-template <typename T_ITensor, typename T_Tensor> class ClTensorManager
-{
-public:
-  using T_ClMemoryManager = ClMemoryManager<T_ITensor, T_Tensor>;
-
-  ClTensorManager(T_ClMemoryManager *const_mgr, T_ClMemoryManager *nonconst_mgr);
-
-  virtual ~ClTensorManager() = default;
-
-  void allocateConsts(void);
-  void allocateNonconsts(void);
-  void deallocateConsts(void);
-  void deallocateNonconsts(void);
-
-  void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info,
-                   InferenceContext::CreateInferenceInfo create_info,
-                   std::shared_ptr<Environment> environment, DeviceInfo &device_info);
-
-  std::shared_ptr<T_ITensor> findTensorAsParent(const ir::OperandIndex &ind);
-
-  void startLifetime(const ir::OperandIndex &ind);
-  void finishLifetime(const ir::OperandIndex &ind);
-
-  std::shared_ptr<T_ITensor> at(const ir::OperandIndex &ind);
-  std::shared_ptr<InferenceContext::DummyTensor> atR(const ir::OperandIndex &ind);
-
-  InferenceContext::TensorReserver &constTensorReservers(void);
-  InferenceContext::TensorReserver &nonconstTensorReservers(void);
-
-  ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &constTensors(void);
-  ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &nonconstTensors(void);
-
-  void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
-
-  void tryDeallocConstants(void);
-
-private:
-  std::unique_ptr<T_ClMemoryManager> _const_mgr;
-  std::unique_ptr<T_ClMemoryManager> _nonconst_mgr;
-  ir::OperandIndexMap<T_ClMemoryManager &> _ind_to_mgr;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#include <cassert>
-#include "util/logging.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-template <typename T_ITensor, typename T_Tensor>
-ClTensorManager<T_ITensor, T_Tensor>::ClTensorManager(T_ClMemoryManager *const_mgr,
-                                                      T_ClMemoryManager *nonconst_mgr)
-  : _const_mgr{const_mgr}, _nonconst_mgr{nonconst_mgr}
-{
-  // DO NOTHING
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorManager<T_ITensor, T_Tensor>::allocateConsts(void)
-{
-  _const_mgr->allocate();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorManager<T_ITensor, T_Tensor>::allocateNonconsts(void)
-{
-  _nonconst_mgr->allocate();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorManager<T_ITensor, T_Tensor>::deallocateConsts(void)
-{
-  _const_mgr->deallocate();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorManager<T_ITensor, T_Tensor>::deallocateNonconsts(void)
-{
-  _nonconst_mgr->deallocate();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorManager<T_ITensor, T_Tensor>::buildTensor(
-  const ir::OperandIndex &ind, const ir::OperandInfo &info,
-  InferenceContext::CreateInferenceInfo create_info, std::shared_ptr<Environment> environment,
-  DeviceInfo &device_info)
-{
-  assert(_ind_to_mgr.find(ind) == _ind_to_mgr.end());
-
-  if (info.isConstant())
-  {
-    _const_mgr->buildTensor(ind, info, create_info, environment, device_info);
-    _ind_to_mgr.insert({ind, *_const_mgr});
-  }
-  else
-  {
-    _nonconst_mgr->buildTensor(ind, info, create_info, environment, device_info);
-    _ind_to_mgr.insert({ind, *_nonconst_mgr});
-  }
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorManager<T_ITensor, T_Tensor>::startLifetime(const ir::OperandIndex &ind)
-{
-  assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end());
-  _ind_to_mgr.at(ind).startLifetime(ind);
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorManager<T_ITensor, T_Tensor>::finishLifetime(const ir::OperandIndex &ind)
-{
-  assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end());
-  _ind_to_mgr.at(ind).finishLifetime(ind);
-}
-
-template <typename T_ITensor, typename T_Tensor>
-std::shared_ptr<T_ITensor> ClTensorManager<T_ITensor, T_Tensor>::at(const ir::OperandIndex &ind)
-{
-  if (_ind_to_mgr.find(ind) == _ind_to_mgr.end())
-    return nullptr;
-
-  auto &tensors = _ind_to_mgr.at(ind).tensors();
-  if (tensors.find(ind) != tensors.end())
-  {
-    return tensors.at(ind);
-  }
-
-  return nullptr;
-}
-
-template <typename T_ITensor, typename T_Tensor>
-ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &
-ClTensorManager<T_ITensor, T_Tensor>::constTensors(void)
-{
-  return _const_mgr->tensors();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-ir::OperandIndexMap<std::shared_ptr<T_Tensor>> &
-ClTensorManager<T_ITensor, T_Tensor>::nonconstTensors(void)
-{
-  return _nonconst_mgr->tensors();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-std::shared_ptr<InferenceContext::DummyTensor>
-ClTensorManager<T_ITensor, T_Tensor>::atR(const ir::OperandIndex &ind)
-{
-  if (_nonconst_mgr->tensorReservers().HaveTensor(ind.value()))
-  {
-    return _nonconst_mgr->tensorReservers().Get(ind.value());
-  }
-  else if (_const_mgr->tensorReservers().HaveTensor(ind.value()))
-  {
-    return _const_mgr->tensorReservers().Get(ind.value());
-  }
-  return nullptr;
-}
-
-template <typename T_ITensor, typename T_Tensor>
-InferenceContext::TensorReserver &ClTensorManager<T_ITensor, T_Tensor>::constTensorReservers(void)
-{
-  return _const_mgr->tensorReservers();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-InferenceContext::TensorReserver &
-ClTensorManager<T_ITensor, T_Tensor>::nonconstTensorReservers(void)
-{
-  return _nonconst_mgr->tensorReservers();
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorManager<T_ITensor, T_Tensor>::iterate(
-  const std::function<void(const ir::OperandIndex &)> &fn)
-{
-  for (auto it : _nonconst_mgr->tensors())
-    fn(it.first);
-
-  for (auto it : _const_mgr->tensors())
-    fn(it.first);
-}
-
-template <typename T_ITensor, typename T_Tensor>
-void ClTensorManager<T_ITensor, T_Tensor>::tryDeallocConstants(void)
-{
-  // NYI
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_ACL_COMMON_TENSOR_MANAGER_H__
diff --git a/runtime/onert/backend/gpu_cl/Config.cc b/runtime/onert/backend/gpu_cl/Config.cc
index 067a2070f..9959a471b 100644
--- a/runtime/onert/backend/gpu_cl/Config.cc
+++ b/runtime/onert/backend/gpu_cl/Config.cc
@@ -17,8 +17,11 @@
 #include "Config.h"
 
 #include <dlfcn.h>
-#include "open_cl/OpenclWrapper.h"
-#include "open_cl/Status.h"
+
+#include "tensorflow/lite/delegates/gpu/common/status.h"
+#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
+
+using namespace tflite::gpu::cl;
 
 namespace onert
 {
@@ -26,12 +29,9 @@ namespace backend
 {
 namespace gpu_cl
 {
-
-Config::~Config() { UnloadOpenCL(_handle); }
-
 bool Config::initialize()
 {
-  if (LoadOpenCL(&_handle).ok())
+  if (LoadOpenCL().ok())
   {
     return true;
   }
diff --git a/runtime/onert/backend/gpu_cl/Config.h b/runtime/onert/backend/gpu_cl/Config.h
index aa5a51a15..6a455bbb5 100644
--- a/runtime/onert/backend/gpu_cl/Config.h
+++ b/runtime/onert/backend/gpu_cl/Config.h
@@ -31,7 +31,7 @@ namespace gpu_cl
 class Config : public IConfig
 {
 public:
-  virtual ~Config();
+  virtual ~Config() {}
 
 public:
   std::string id() override { return "gpu_cl"; }
diff --git a/runtime/onert/backend/gpu_cl/KernelGenerator.cc b/runtime/onert/backend/gpu_cl/KernelGenerator.cc
index a84867f8c..04edc3928 100644
--- a/runtime/onert/backend/gpu_cl/KernelGenerator.cc
+++ b/runtime/onert/backend/gpu_cl/KernelGenerator.cc
@@ -19,13 +19,14 @@
 
 #include "KernelGenerator.h"
 
-#include "ClTensorRegistry.h"
 #include "ClFunction.h"
 #include "TensorManager.h"
 
-#include "open_cl/selectors/ConvolutionSelector.h"
-#include "open_cl/selectors/DwConvolutionSelector.h"
-#include "open_cl/selectors/SimpleSelectors.h"
+#include "tensorflow/lite/delegates/gpu/common/shape.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor.h"
+#include "tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.h"
+#include "tensorflow/lite/delegates/gpu/cl/selectors/dw_convolution_selector.h"
+#include "tensorflow/lite/delegates/gpu/cl/selectors/simple_selectors.h"
 
 #include "ir/Operations.h"
 #include "ir/Operations.Include.h"
@@ -37,6 +38,9 @@
 #include "util/logging.h"
 #include "util/Utils.h"
 
+using namespace tflite::gpu;
+using namespace tflite::gpu::cl;
+
 namespace onert
 {
 namespace backend
@@ -60,14 +64,14 @@ void UpdatePadding(const ir::PaddingType type, const BHWC &input_shape, AttrT *a
   }
 }
 
-gpu_cl::PoolingType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
+PoolingType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
 {
   switch (type_ir)
   {
     case ir::operation::Pool2D::PoolType::AVG:
-      return gpu_cl::PoolingType::AVERAGE;
+      return PoolingType::AVERAGE;
     case ir::operation::Pool2D::PoolType::MAX:
-      return gpu_cl::PoolingType::MAX;
+      return PoolingType::MAX;
     default:
       throw std::runtime_error("gpu_Cl KernelGenerator : Not supported operation yet");
   }
@@ -75,7 +79,7 @@ gpu_cl::PoolingType convertPoolType(ir::operation::Pool2D::PoolType type_ir)
 
 KernelGenerator::KernelGenerator(const ir::Graph &graph,
                                  const std::shared_ptr<TensorBuilder> &tensor_builder,
-                                 const std::shared_ptr<ClTensorRegistry<TensorManager>> &tensor_reg,
+                                 const std::shared_ptr<TensorRegistry> &tensor_reg,
                                  const std::shared_ptr<CreationContext> &creation_context)
   : basic::KernelGeneratorBase{graph}, _ctx(graph.operands()),
     _operations_ctx(graph.operations()), _current_layout{graph.layout()},
@@ -190,7 +194,7 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
   auto bias_tensor = _tensor_reg->getClTensor(bias);
   auto output_tensor = _tensor_reg->getClTensor(output);
 
-  gpu_cl::Convolution2DAttributes attr;
+  Convolution2DAttributes attr;
   attr.strides = ToHW(param.stride.vertical, param.stride.horizontal);
   attr.dilations = HW(std::max(static_cast<u_int32_t>(1), param.dilation.height_factor),
                       std::max(static_cast<u_int32_t>(1), param.dilation.width_factor));
@@ -237,7 +241,7 @@ void KernelGenerator::visit(const ir::operation::Conv2D &node)
     {
       std::unique_ptr<GPUOperation> gpu_op_1;
       OperationDef op_def_1;
-      std::shared_ptr<Tensor> new_tensor = std::make_shared<Tensor>();
+      std::shared_ptr<cl::Tensor> new_tensor = std::make_shared<cl::Tensor>();
 
       _new_tensors[output] = new_tensor;
       if (!CreateTensor(*_creation_context->context, output_shape,
@@ -334,9 +338,9 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
     const int filter_width = ker_shape.w;
     const int output_depth = out_shape.c;
 
-    InternalTensor<OHWI, DataType::FLOAT32> weights;
+    tflite::gpu::Tensor<OHWI, DataType::FLOAT32> weights;
     weights.id = attr.weights.id;
-    weights.shape = OHWI(output_depth, filter_height, filter_width, input_depth);
+    weights.shape = tflite::gpu::OHWI(output_depth, filter_height, filter_width, input_depth);
     weights.data.resize(weights.shape.DimensionsProduct());
     float *dst = &weights.data[0];
     for (int j = 0; j < output_depth; ++j)
@@ -387,7 +391,7 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
     {
       std::unique_ptr<GPUOperation> gpu_op_1;
       OperationDef op_def_1;
-      std::shared_ptr<Tensor> new_tensor = std::make_shared<Tensor>();
+      std::shared_ptr<cl::Tensor> new_tensor = std::make_shared<cl::Tensor>();
 
       _new_tensors[ofm_index] = new_tensor;
       if (!CreateTensor(*_creation_context->context, out_shape,
diff --git a/runtime/onert/backend/gpu_cl/KernelGenerator.h b/runtime/onert/backend/gpu_cl/KernelGenerator.h
index 3e341b111..91fd3cd9d 100644
--- a/runtime/onert/backend/gpu_cl/KernelGenerator.h
+++ b/runtime/onert/backend/gpu_cl/KernelGenerator.h
@@ -17,11 +17,13 @@
 #ifndef __ONERT_BACKEND_GPU_CL_KERNEL_GENERATOR_H__
 #define __ONERT_BACKEND_GPU_CL_KERNEL_GENERATOR_H__
 
-#include "ClTensorRegistry.h"
+#include "TensorRegistry.h"
 #include "backend/basic/TensorRegistry.h"
 #include "TensorBuilder.h"
 #include "TensorManager.h"
 
+#include "tensorflow/lite/delegates/gpu/api.h"
+
 #include <backend/CustomKernelBuilder.h>
 #include <backend/basic/KernelGeneratorBase.h>
 #include <ir/Operands.h>
@@ -39,8 +41,8 @@ class KernelGenerator : public basic::KernelGeneratorBase
 {
 public:
   KernelGenerator(const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
-                  const std::shared_ptr<ClTensorRegistry<TensorManager>> &tensor_reg,
-                  const std::shared_ptr<CreationContext> &creation_context);
+                  const std::shared_ptr<TensorRegistry> &tensor_reg,
+                  const std::shared_ptr<tflite::gpu::cl::CreationContext> &creation_context);
 
   std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex ind) override;
 
@@ -58,9 +60,9 @@ private:
   const ir::Operations &_operations_ctx;
   ir::Layout _current_layout;
   std::shared_ptr<TensorBuilder> _tensor_builder;
-  std::shared_ptr<ClTensorRegistry<TensorManager>> _tensor_reg;
-  std::shared_ptr<CreationContext> _creation_context;
-  ir::OperandIndexMap<std::shared_ptr<Tensor>> _new_tensors;
+  std::shared_ptr<TensorRegistry> _tensor_reg;
+  std::shared_ptr<tflite::gpu::cl::CreationContext> _creation_context;
+  ir::OperandIndexMap<std::shared_ptr<tflite::gpu::cl::Tensor>> _new_tensors;
 };
 
 } // namespace gpu_cl
diff --git a/runtime/onert/backend/gpu_cl/MemoryManager.h b/runtime/onert/backend/gpu_cl/MemoryManager.h
new file mode 100644
index 000000000..a3b9b39de
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/MemoryManager.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_GPU_CL_MEMORY_MANAGER_H__
+#define __ONERT_BACKEND_GPU_CL_MEMORY_MANAGER_H__
+
+#include "ex/InferenceContextEx.h"
+#include "operand/CLTensor.h"
+
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandInfo.h"
+#include "util/logging.h"
+
+#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
+#include "tensorflow/lite/delegates/gpu/common/status.h"
+#include "tensorflow/lite/delegates/gpu/cl/storage_type_util.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
+
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+class MemoryManager
+{
+public:
+  MemoryManager(tflite::gpu::cl::CLContext *context) : _context{context} {}
+
+  ~MemoryManager() = default;
+
+  void allocate(void)
+  {
+    for (const auto &tensor_entry : _tensors)
+    {
+      auto tensor = tensor_entry.second;
+      auto type = tensor->get_type();
+
+      // if (type == TensorType::TENSOR_TYPE_DELETE) {
+      //   continue;
+      // }
+
+      const auto &t = tensor_reserver_.Get(tensor_entry.first.value());
+      const auto &shape = t->shape;
+      const auto &descriptor = t->descriptor;
+      if (!CreateTensor(*_context, shape, descriptor, tensor->handle()).ok())
+      {
+        std::runtime_error("Failed to CreateTensor");
+      }
+      switch (type)
+      {
+        case TensorType::TENSOR_TYPE_INPUT:
+          tensor->writeConvertInit();
+          break;
+        case TensorType::TENSOR_TYPE_OUTPUT:
+          tensor->readConvertInit();
+          break;
+        default:
+          break;
+      }
+    }
+  }
+
+  void deallocate(void)
+  {
+    // NYI
+  }
+
+  void startLifetime(const ir::OperandIndex &)
+  { /* DO NOTHING */
+  }
+  void finishLifetime(const ir::OperandIndex &)
+  { /* DO NOTHING */
+  }
+
+  void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                   tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info,
+                   std::shared_ptr<tflite::gpu::cl::Environment> environment,
+                   tflite::gpu::cl::DeviceInfo &device_info, TensorType type)
+  {
+    tflite::gpu::ValueId max_id = 0;
+    auto data_type = DeduceDataTypeFromPrecision(create_info.precision);
+    const auto shape = info.shape();
+
+    auto tensor = std::make_shared<operand::CLTensor>(shape.rank(), shape, environment, type);
+    _tensors[ind] = tensor;
+    tflite::gpu::BHWC t_shape;
+    switch (shape.rank())
+    {
+      case 1:
+        // B layout
+        t_shape = tflite::gpu::BHWC(shape.dim(0), 1, 1, 1);
+        break;
+      case 2:
+        // BC layout
+        t_shape = tflite::gpu::BHWC(shape.dim(0), 1, 1, shape.dim(1));
+        break;
+      case 3:
+        // BWC layout
+        t_shape = tflite::gpu::BHWC(shape.dim(0), 1, shape.dim(1), shape.dim(2));
+        break;
+      case 4:
+        // BHWC layout
+        t_shape = tflite::gpu::BHWC(shape.dim(0), shape.dim(1), shape.dim(2), shape.dim(3));
+        break;
+      default:
+        break;
+    }
+
+    tflite::gpu::cl::TensorStorageType storage_type = create_info.storage_type;
+    tflite::gpu::Layout layout =
+      t_shape.b == 1 ? tflite::gpu::Layout::HWC : tflite::gpu::Layout::BHWC;
+
+    tflite::gpu::ValueId id = ind.value();
+    storage_type =
+      tflite::gpu::cl::SelectBestStorageType(device_info, t_shape, storage_type, data_type, layout);
+    auto dummy = std::make_shared<InferenceContextEx::DummyTensor>();
+    dummy->shape = t_shape;
+    dummy->descriptor = tflite::gpu::cl::TensorDescriptor{data_type, storage_type, layout};
+    tensor_reserver_.Add(id, dummy);
+
+    max_id = std::max(max_id, id);
+
+    tensor_reserver_.SetNext(max_id + 1);
+  }
+
+  ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &tensors(void) { return _tensors; }
+
+  InferenceContextEx::TensorReserverEx &tensorReservers(void) { return tensor_reserver_; }
+
+private:
+  ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> _tensors;
+  InferenceContextEx::TensorReserverEx tensor_reserver_;
+  tflite::gpu::cl::CLContext *_context;
+};
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_GPU_CL_MEMORY_MANAGER_H__
diff --git a/runtime/onert/backend/gpu_cl/TensorBuilder.cc b/runtime/onert/backend/gpu_cl/TensorBuilder.cc
new file mode 100644
index 000000000..e71733427
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/TensorBuilder.cc
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+#include <queue>
+
+#include "TensorBuilder.h"
+
+#include "TensorManager.h"
+
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
+#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
+
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+#include <ir/Operands.h>
+#include <util/Utils.h>
+
+#include <cassert>
+#include <stack>
+
+#include "util/logging.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+using UsesType = cl_common::UsesType;
+
+TensorBuilder::TensorBuilder(const ir::Operands &operands, TensorManager *tensor_mgr,
+                             tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info,
+                             const std::shared_ptr<tflite::gpu::cl::Environment> &environment)
+  : _operands{operands}, _tensor_mgr{tensor_mgr}, _create_info{create_info}, _environment{
+                                                                               environment}
+{
+  assert(_tensor_mgr);
+}
+
+void TensorBuilder::registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                                       ir::Layout backend_layout, TensorType type)
+{
+  assert(_tensor_mgr->constTensors().size() == 0);
+  assert(_tensor_mgr->nonconstTensors().size() == 0);
+
+  _uses_count_map[ind] = _operands.at(ind).getUses().size();
+
+  _tensor_info_map.emplace(ind, info);
+  _tensor_type_map.emplace(ind, type);
+
+  _tensor_layout_map.insert({ind, backend_layout});
+}
+
+void TensorBuilder::notifyFirstUse(const ir::OperandIndex &ind)
+{
+  _lifetime_seq.emplace_back(UsesType::FIRST, ind);
+}
+
+void TensorBuilder::notifyLastUse(const ir::OperandIndex &ind)
+{
+  _lifetime_seq.emplace_back(UsesType::LAST, ind);
+}
+
+bool TensorBuilder::isRegistered(const ir::OperandIndex &ind) const
+{
+  return _tensor_info_map.find(ind) != _tensor_info_map.end();
+}
+
+void TensorBuilder::prepare(void) { buildTensors(); }
+
+void TensorBuilder::allocate(void)
+{
+  auto lifetime_map = cl_common::createLifetimeMap(_lifetime_seq, _parent_map);
+
+  for (auto &entry : lifetime_map)
+  {
+    auto &use = entry.second;
+    auto use_type = use.first;
+    auto use_index = use.second;
+    assert(use_index.valid());
+    if (use_type == UsesType::FIRST)
+      _tensor_mgr->startLifetime(use_index);
+    else
+      _tensor_mgr->finishLifetime(use_index);
+  }
+
+  _tensor_mgr->allocateConsts();
+
+  // TODO Since `_parent_map` is filled for all Concat nodes even if the node this backend uses
+  //      After refactoring BackendContext we can uncomment this
+  // assert(_tensor_info_map.size() ==
+  //       _tensor_mgr->nonconstTensors().size() + num of constants of _tensor_info_map +
+  //       _parent_map.size());
+  _tensor_mgr->allocateNonconsts();
+}
+
+void TensorBuilder::postFunctionPrepare(void) { _tensor_mgr->tryDeallocConstants(); }
+
+void TensorBuilder::buildTensors(void)
+{
+  assert(_tensor_mgr->constTensors().size() == 0);
+  assert(_tensor_mgr->nonconstTensors().size() == 0);
+  // Normal tensors
+  for (auto &entry : _tensor_info_map)
+  {
+    auto ind = entry.first;
+    if (_parent_map.count(ind) > 0)
+      continue;
+    auto type = _tensor_type_map.at(ind);
+    const auto &info = entry.second;
+    _tensor_mgr->buildTensor(ind, info, _create_info, _environment, _environment->device().info_,
+                             type);
+  }
+}
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/TensorBuilder.h b/runtime/onert/backend/gpu_cl/TensorBuilder.h
index d55358191..2a5cb8b5e 100644
--- a/runtime/onert/backend/gpu_cl/TensorBuilder.h
+++ b/runtime/onert/backend/gpu_cl/TensorBuilder.h
@@ -17,10 +17,13 @@
 #ifndef __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_H__
 #define __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_H__
 
-#include <backend/basic/TensorBuilder.h>
-#include "operand/ICLTensor.h"
-#include "operand/CLTensor.h"
-#include "ClTensorBuilder.h"
+#include "TensorManager.h"
+
+#include <cl_common/LifetimeMap.h>
+#include <cl_common/ParentInfo.h>
+
+#include <ir/Operands.h>
+#include <ir/OperandIndexSequence.h>
 
 namespace onert
 {
@@ -28,8 +31,76 @@ namespace backend
 {
 namespace gpu_cl
 {
+class TensorBuilder
+{
+public:
+  TensorBuilder(const ir::Operands &operands, TensorManager *tensor_mgr,
+                tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info,
+                const std::shared_ptr<tflite::gpu::cl::Environment> &environment);
+
+  /**
+   * @brief     Register tensor information to allocate on ACL-CL backend
+   * @param[in] ind    Operand index
+   * @param[in] info   Tensor information
+   * @param[in] layout Tensor data layout
+   */
+  void registerTensorInfo(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                          ir::Layout backend_layout, TensorType type);
+
+  void notifyFirstUse(const ir::OperandIndex &);
+  void notifyLastUse(const ir::OperandIndex &);
+
+  bool isRegistered(const ir::OperandIndex &) const;
+
+  void prepare();
+  void allocate();
+  void postFunctionPrepare();
+
+  TensorManager *cl_tensor_manager(void) { return _tensor_mgr.get(); }
+
+  void setUsesCount(const ir::OperandIndex &index, size_t num_uses)
+  {
+    assert(_uses_count_map.find(index) != _uses_count_map.end() ? _uses_count_map[index] == num_uses
+                                                                : true);
+    _uses_count_map[index] = num_uses;
+  }
+
+  void parent_map(std::unordered_map<ir::OperandIndex, cl_common::ParentInfo> &&parent_map)
+  {
+    _parent_map = std::move(parent_map);
+  }
+
+  bool areSubTensorsOf(const ir::OperandIndex &parent, const ir::OperandIndexSequence &seq);
+
+  /**
+   * @brief     Check child tensor is allocated as subtensor of parent tensor
+   * @param[in] parent  Index of parent
+   * @param[in] child   Index of child
+   * @return    @c true if child is allocated as subtensor of parent, otherwise @c false
+   */
+  bool isSubTensorOf(const ir::OperandIndex &parent, const ir::OperandIndex &child);
+
+private:
+  void buildTensors(void);
+  ir::OperandIndex findRootParent(ir::OperandIndex index);
+
+private:
+  const ir::Operands &_operands;
+  ir::OperandIndexMap<ir::OperandInfo> _tensor_info_map;
+  ir::OperandIndexMap<ir::Layout> _tensor_layout_map;
+  ir::OperandIndexMap<TensorType> _tensor_type_map;
+  ir::OperandIndexMap<size_t> _uses_count_map;
+
+  std::unique_ptr<TensorManager> _tensor_mgr;
+  tflite::gpu::cl::InferenceContext::CreateInferenceInfo _create_info;
+  std::shared_ptr<tflite::gpu::cl::Environment> _environment;
+
+  // for linear executor
+  cl_common::LifetimeSeq _lifetime_seq;
 
-using TensorBuilder = ClTensorBuilder<operand::ICLTensor, operand::CLTensor>;
+  // Extra info for concat elimination
+  ir::OperandIndexMap<cl_common::ParentInfo> _parent_map;
+};
 
 } // namespace gpu_cl
 } // namespace backend
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvCommon.h b/runtime/onert/backend/gpu_cl/TensorBuilderHelper.h
index 4700381dc..7290ff5da 100644
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvCommon.h
+++ b/runtime/onert/backend/gpu_cl/TensorBuilderHelper.h
@@ -1,12 +1,11 @@
 /*
  * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *    http://www.apache.org/licenses/LICENSE-2.0
+ *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -15,8 +14,11 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_COMMON_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_COMMON_H__
+#ifndef __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__
+#define __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__
+
+#include "absl/status/status.h"
+#include "tensorflow/lite/delegates/gpu/common/shape.h"
 
 namespace onert
 {
@@ -25,20 +27,18 @@ namespace backend
 namespace gpu_cl
 {
 
-enum class ConvWeightsLayout
+enum TensorType
 {
-  kUnknown,
-  kOHWIOGroupI4O4,
+  TENSOR_TYPE_VALID = 0,
+  TENSOR_TYPE_INPUT = 1,
+  TENSOR_TYPE_OUTPUT = 2,
+  TENSOR_TYPE_DELETE = 3
 };
 
-struct ConvWeightsDescription
-{
-  ConvWeightsLayout layout;
-  int output_group_size;
-};
+absl::Status ExtractAxisFromIndex(int dims, int index, tflite::gpu::Axis *axis);
 
 } // namespace gpu_cl
 } // namespace backend
 } // namespace onert
 
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_COMMON_H__
+#endif // __ONERT_BACKEND_GPU_CL_TENSOR_BUILDER_HELPER_H__
diff --git a/runtime/onert/backend/gpu_cl/TensorManager.cc b/runtime/onert/backend/gpu_cl/TensorManager.cc
new file mode 100644
index 000000000..9fe0605ac
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/TensorManager.cc
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TensorManager.h"
+
+#include <util/logging.h>
+
+#include <cassert>
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+TensorManager::TensorManager(MemoryManager *const_mgr, MemoryManager *nonconst_mgr)
+  : _const_mgr{const_mgr}, _nonconst_mgr{nonconst_mgr}
+{
+  // DO NOTHING
+}
+
+void TensorManager::allocateConsts(void) { _const_mgr->allocate(); }
+
+void TensorManager::allocateNonconsts(void) { _nonconst_mgr->allocate(); }
+
+void TensorManager::deallocateConsts(void) { _const_mgr->deallocate(); }
+
+void TensorManager::deallocateNonconsts(void) { _nonconst_mgr->deallocate(); }
+
+void TensorManager::buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                                tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info,
+                                std::shared_ptr<tflite::gpu::cl::Environment> environment,
+                                tflite::gpu::cl::DeviceInfo &device_info, TensorType type)
+{
+  assert(_ind_to_mgr.find(ind) == _ind_to_mgr.end());
+
+  if (info.isConstant())
+  {
+    _const_mgr->buildTensor(ind, info, create_info, environment, device_info, type);
+    _ind_to_mgr.insert({ind, *_const_mgr});
+  }
+  else
+  {
+    _nonconst_mgr->buildTensor(ind, info, create_info, environment, device_info, type);
+    _ind_to_mgr.insert({ind, *_nonconst_mgr});
+  }
+}
+
+void TensorManager::startLifetime(const ir::OperandIndex &ind)
+{
+  assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end());
+  _ind_to_mgr.at(ind).startLifetime(ind);
+}
+
+void TensorManager::finishLifetime(const ir::OperandIndex &ind)
+{
+  assert(_ind_to_mgr.find(ind) != _ind_to_mgr.end());
+  _ind_to_mgr.at(ind).finishLifetime(ind);
+}
+
+std::shared_ptr<operand::ICLTensor> TensorManager::at(const ir::OperandIndex &ind)
+{
+  if (_ind_to_mgr.find(ind) == _ind_to_mgr.end())
+    return nullptr;
+
+  auto &tensors = _ind_to_mgr.at(ind).tensors();
+  if (tensors.find(ind) != tensors.end())
+  {
+    return tensors.at(ind);
+  }
+
+  return nullptr;
+}
+
+ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &TensorManager::constTensors(void)
+{
+  return _const_mgr->tensors();
+}
+
+ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &TensorManager::nonconstTensors(void)
+{
+  return _nonconst_mgr->tensors();
+}
+
+std::shared_ptr<InferenceContextEx::DummyTensor> TensorManager::atR(const ir::OperandIndex &ind)
+{
+  if (_nonconst_mgr->tensorReservers().HaveTensor(ind.value()))
+  {
+    return _nonconst_mgr->tensorReservers().Get(ind.value());
+  }
+  else if (_const_mgr->tensorReservers().HaveTensor(ind.value()))
+  {
+    return _const_mgr->tensorReservers().Get(ind.value());
+  }
+  return nullptr;
+}
+
+InferenceContextEx::TensorReserverEx &TensorManager::constTensorReservers(void)
+{
+  return _const_mgr->tensorReservers();
+}
+
+InferenceContextEx::TensorReserverEx &TensorManager::nonconstTensorReservers(void)
+{
+  return _nonconst_mgr->tensorReservers();
+}
+
+void TensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
+{
+  for (auto it : _nonconst_mgr->tensors())
+    fn(it.first);
+
+  for (auto it : _const_mgr->tensors())
+    fn(it.first);
+}
+
+void TensorManager::tryDeallocConstants(void)
+{
+  // NYI
+}
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/TensorManager.h b/runtime/onert/backend/gpu_cl/TensorManager.h
index 111b5f8a7..52abc579a 100644
--- a/runtime/onert/backend/gpu_cl/TensorManager.h
+++ b/runtime/onert/backend/gpu_cl/TensorManager.h
@@ -14,15 +14,16 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_BACKEND_CL_TENSOR_MANAGER_H__
-#define __ONERT_BACKEND_CL_TENSOR_MANAGER_H__
+#ifndef __ONERT_BACKEND_GPU_CL_TENSOR_MANAGER_H__
+#define __ONERT_BACKEND_GPU_CL_TENSOR_MANAGER_H__
 
-#include "ClMemoryManager.h"
-#include "ClTensorManager.h"
-#include "open_cl/ClContext.h"
-#include "operand/CLTensor.h"
-#include "operand/ICLTensor.h"
-#include "util/logging.h"
+#include "MemoryManager.h"
+
+#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
+
+#include "ir/OperandInfo.h"
+#include "ir/OperandIndexMap.h"
 
 namespace onert
 {
@@ -31,13 +32,50 @@ namespace backend
 namespace gpu_cl
 {
 
-using MemoryManager = ClMemoryManager<operand::ICLTensor, operand::CLTensor>;
+class TensorManager
+{
+public:
+  TensorManager(MemoryManager *const_mgr, MemoryManager *nonconst_mgr);
+
+  virtual ~TensorManager() = default;
+
+  void allocateConsts(void);
+  void allocateNonconsts(void);
+  void deallocateConsts(void);
+  void deallocateNonconsts(void);
+
+  void buildTensor(const ir::OperandIndex &ind, const ir::OperandInfo &info,
+                   tflite::gpu::cl::InferenceContext::CreateInferenceInfo create_info,
+                   std::shared_ptr<tflite::gpu::cl::Environment> environment,
+                   tflite::gpu::cl::DeviceInfo &device_info, TensorType type);
+
+  std::shared_ptr<operand::ICLTensor> findTensorAsParent(const ir::OperandIndex &ind);
+
+  void startLifetime(const ir::OperandIndex &ind);
+  void finishLifetime(const ir::OperandIndex &ind);
+
+  std::shared_ptr<operand::ICLTensor> at(const ir::OperandIndex &ind);
+  std::shared_ptr<InferenceContextEx::DummyTensor> atR(const ir::OperandIndex &ind);
+
+  InferenceContextEx::TensorReserverEx &constTensorReservers(void);
+  InferenceContextEx::TensorReserverEx &nonconstTensorReservers(void);
+
+  ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &constTensors(void);
+  ir::OperandIndexMap<std::shared_ptr<operand::CLTensor>> &nonconstTensors(void);
+
+  void iterate(const std::function<void(const ir::OperandIndex &)> &fn);
+
+  void tryDeallocConstants(void);
 
-using TensorManager = ClTensorManager<operand::ICLTensor, operand::CLTensor>;
+private:
+  std::unique_ptr<MemoryManager> _const_mgr;
+  std::unique_ptr<MemoryManager> _nonconst_mgr;
+  ir::OperandIndexMap<MemoryManager &> _ind_to_mgr;
+};
 
-inline TensorManager *createTensorManager(CLContext *context)
+inline TensorManager *createTensorManager(tflite::gpu::cl::CLContext *context)
 {
-  VERBOSE(createTensorManager) << "ClTensorManager" << std::endl;
+  VERBOSE(createTensorManager) << "GPU-CL TensorManager" << std::endl;
   return new TensorManager(new MemoryManager(context), new MemoryManager(context));
 }
 
@@ -45,4 +83,4 @@ inline TensorManager *createTensorManager(CLContext *context)
 } // namespace backend
 } // namespace onert
 
-#endif // __ONERT_BACKEND_ACL_CL_TENSOR_MANAGER_H__
+#endif // __ONERT_BACKEND_GPU_CL_TENSOR_MANAGER_H__
diff --git a/runtime/onert/backend/gpu_cl/ClTensorRegistry.h b/runtime/onert/backend/gpu_cl/TensorRegistry.h
index 1f0018bd1..6f17aff54 100644
--- a/runtime/onert/backend/gpu_cl/ClTensorRegistry.h
+++ b/runtime/onert/backend/gpu_cl/TensorRegistry.h
@@ -17,6 +17,8 @@
 #ifndef __ONERT_BACKEND_GPU_CL_TENSOR_REGISTRY_H__
 #define __ONERT_BACKEND_GPU_CL_TENSOR_REGISTRY_H__
 
+#include "TensorManager.h"
+
 #include "backend/ITensorRegistry.h"
 
 namespace onert
@@ -27,14 +29,14 @@ namespace gpu_cl
 {
 
 /**
- * @brief Tensor registry class for acl backends
+ * @brief Tensor registry class for gpu-cl backends
  *
- * This is implemented as a wrapper of AclTensorManager.
+ * This is implemented as a wrapper of TensorManager.
  */
-template <typename T_ClTensorManager> class ClTensorRegistry : public ITensorRegistry
+class TensorRegistry : public ITensorRegistry
 {
 public:
-  ClTensorRegistry(T_ClTensorManager *tensor_mgr) : _tensor_mgr{tensor_mgr} {}
+  TensorRegistry(TensorManager *tensor_mgr) : _tensor_mgr{tensor_mgr} {}
 
   ITensor *getITensor(const ir::OperandIndex &ind) override { return _tensor_mgr->at(ind).get(); }
 
@@ -45,7 +47,7 @@ public:
   auto getClTensorReserver(const ir::OperandIndex &ind) { return _tensor_mgr->atR(ind); }
 
 private:
-  T_ClTensorManager *_tensor_mgr;
+  TensorManager *_tensor_mgr;
 };
 
 } // namespace gpu_cl
diff --git a/runtime/onert/backend/gpu_cl/ex/InferenceContextEx.h b/runtime/onert/backend/gpu_cl/ex/InferenceContextEx.h
new file mode 100644
index 000000000..f67387904
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/ex/InferenceContextEx.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_GPU_CL_INFERENCE_CONTEXT_EX_H__
+#define __ONERT_BACKEND_GPU_CL_INFERENCE_CONTEXT_EX_H__
+
+#include "tensorflow/lite/delegates/gpu/cl/inference_context.h"
+#include "tensorflow/lite/delegates/gpu/common/model.h"
+#include "absl/strings/str_cat.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+
+class InferenceContextEx : public tflite::gpu::cl::InferenceContext
+{
+public:
+  struct DummyTensor
+  {
+    tflite::gpu::BHWC shape;
+    tflite::gpu::cl::TensorDescriptor descriptor;
+
+    bool operator==(const DummyTensor &b) const
+    {
+      return shape == b.shape && descriptor == b.descriptor;
+    }
+  };
+
+  class TensorReserverEx
+  {
+  public:
+    tflite::gpu::ValueId Add(const std::shared_ptr<DummyTensor> &dummy)
+    {
+      reservations_[next_] = dummy;
+      return next_++;
+    }
+    void Add(tflite::gpu::ValueId id, const std::shared_ptr<DummyTensor> &dummy)
+    {
+      reservations_[id] = dummy;
+    }
+    void SetNext(tflite::gpu::ValueId id) { next_ = id; }
+    bool HaveTensor(tflite::gpu::ValueId id)
+    {
+      return reservations_.find(id) != reservations_.end();
+    }
+    std::shared_ptr<DummyTensor> Get(tflite::gpu::ValueId id) { return reservations_[id]; }
+
+    std::vector<std::pair<tflite::gpu::ValueId, tflite::gpu::cl::TensorDescriptor>>
+    GetTensorDescs() const
+    {
+      std::vector<std::pair<tflite::gpu::ValueId, tflite::gpu::cl::TensorDescriptor>> result;
+      for (auto &v : reservations_)
+      {
+        tflite::gpu::cl::TensorDescriptor desc = v.second->descriptor;
+        desc.shape.b = v.second->shape.b;
+        desc.shape.h = v.second->shape.h;
+        desc.shape.w = v.second->shape.w;
+        desc.shape.d = 1;
+        desc.shape.c = v.second->shape.c;
+        result.push_back({v.first, desc});
+      }
+      return result;
+    }
+
+    void Add(const std::vector<std::pair<tflite::gpu::ValueId, tflite::gpu::cl::TensorDescriptor>>
+               &tensors)
+    {
+      for (auto &v : tensors)
+      {
+        auto dummy = std::make_shared<DummyTensor>();
+        dummy->descriptor = v.second;
+        dummy->shape.b = v.second.shape.b;
+        dummy->shape.h = v.second.shape.h;
+        dummy->shape.w = v.second.shape.w;
+        dummy->shape.c = v.second.shape.c;
+        Add(v.first, dummy);
+      }
+    }
+
+  private:
+    // absl::flat_hash_map<ValueId, DummyTensor> reservations_;
+    std::unordered_map<tflite::gpu::ValueId, std::shared_ptr<DummyTensor>> reservations_;
+    tflite::gpu::ValueId next_ = 0;
+  };
+};
+
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_GPU_CL_INFERENCE_CONTEXT_EX_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Api.cc b/runtime/onert/backend/gpu_cl/open_cl/Api.cc
deleted file mode 100644
index 10bf87c38..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Api.cc
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Api.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-struct ObjectTypeGetter
-{
-  ObjectType operator()(absl::monostate) const { return ObjectType::UNKNOWN; }
-  ObjectType operator()(OpenClBuffer) const { return ObjectType::OPENCL_BUFFER; }
-  ObjectType operator()(OpenClTexture) const { return ObjectType::OPENCL_TEXTURE; }
-  ObjectType operator()(CpuMemory) const { return ObjectType::CPU_MEMORY; }
-};
-
-struct ObjectValidityChecker
-{
-  bool operator()(absl::monostate) const { return false; }
-  bool operator()(OpenClBuffer obj) const { return obj.memobj; }
-  bool operator()(OpenClTexture obj) const { return obj.memobj; }
-  bool operator()(CpuMemory obj) const
-  {
-    return obj.data != nullptr && obj.size_bytes > 0 &&
-           (data_type == DataType::UNKNOWN || obj.size_bytes % SizeOf(data_type) == 0);
-  }
-  DataType data_type;
-};
-
-} // namespace
-
-bool IsValid(const ObjectDef &def)
-{
-  return def.data_type != DataType::UNKNOWN && def.data_layout != DataLayout::UNKNOWN &&
-         def.object_type != ObjectType::UNKNOWN;
-}
-
-ObjectType GetType(const TensorObject &object) { return absl::visit(ObjectTypeGetter{}, object); }
-
-bool IsValid(const TensorObjectDef &def) { return IsValid(def.object_def); }
-
-bool IsValid(const TensorObjectDef &def, const TensorObject &object)
-{
-  return GetType(object) == def.object_def.object_type &&
-         absl::visit(ObjectValidityChecker{def.object_def.data_type}, object);
-}
-
-bool IsObjectPresent(ObjectType type, const TensorObject &obj)
-{
-  switch (type)
-  {
-    case ObjectType::CPU_MEMORY:
-      return absl::holds_alternative<CpuMemory>(obj);
-    case ObjectType::OPENCL_BUFFER:
-      return absl::holds_alternative<OpenClBuffer>(obj);
-    case ObjectType::OPENCL_TEXTURE:
-      return absl::holds_alternative<OpenClTexture>(obj);
-    case ObjectType::UNKNOWN:
-      return false;
-  }
-  return false;
-}
-
-uint32_t NumElements(const TensorObjectDef &def)
-{
-  const auto &d = def.dimensions;
-  switch (def.object_def.data_layout)
-  {
-    case DataLayout::BHWC:
-      return d.product();
-    case DataLayout::HWDC4:
-    case DataLayout::HDWC4:
-    case DataLayout::DHWC4:
-      return d.b * d.h * d.w * AlignByN(d.c, 4);
-    case DataLayout::UNKNOWN:
-      return 0;
-  }
-  return 0;
-}
-
-int GetPosition(const InferenceOptions &options, InferencePriority p)
-{
-  if (options.priority1 == p)
-    return 1;
-  if (options.priority2 == p)
-    return 2;
-  if (options.priority3 == p)
-    return 3;
-  return 4; // least important
-}
-
-PriorityImportance GetRelativeImportance(const InferenceOptions &options, InferencePriority p1,
-                                         InferencePriority p2)
-{
-  int p1_position = GetPosition(options, p1);
-  int p2_position = GetPosition(options, p2);
-  if (p1_position == p2_position)
-    return PriorityImportance::UNKNOWN;
-  return p1_position < p2_position ? PriorityImportance::HIGHER : PriorityImportance::LOWER;
-}
-
-bool IsValid(const InferenceOptions &options)
-{
-  if (options.usage == InferenceUsage::UNKNOWN)
-  {
-    return false;
-  }
-  if (options.priority1 == InferencePriority::UNKNOWN ||
-      options.priority2 == InferencePriority::UNKNOWN ||
-      options.priority3 == InferencePriority::UNKNOWN)
-  {
-    return false;
-  }
-  if (options.priority1 == InferencePriority::AUTO)
-  {
-    return false;
-  }
-  if (options.priority2 == InferencePriority::AUTO && options.priority3 != InferencePriority::AUTO)
-  {
-    return false;
-  }
-  if (options.priority1 == options.priority2 || options.priority1 == options.priority3)
-  {
-    return false;
-  }
-  if (options.priority2 == options.priority3 && options.priority2 != InferencePriority::AUTO)
-  {
-    return false;
-  }
-  return true;
-}
-
-// Implementation note: this resolution logic is shared between GL and CL
-// backends, but they might have own logic. Thus, the function is defined
-// here just for code re-use purposes.
-void ResolveAutoPriority(InferenceOptions *options)
-{
-  // priority1 can not be AUTO as it would make options invalid.
-  if (options->priority2 == InferencePriority::AUTO)
-  {
-    switch (options->priority1)
-    {
-      case InferencePriority::MIN_LATENCY:
-        options->priority2 = InferencePriority::MIN_MEMORY_USAGE;
-        options->priority3 = InferencePriority::MAX_PRECISION;
-        return;
-      case InferencePriority::MIN_MEMORY_USAGE:
-        options->priority2 = InferencePriority::MAX_PRECISION;
-        options->priority3 = InferencePriority::MIN_LATENCY;
-        return;
-      case InferencePriority::MAX_PRECISION:
-        options->priority2 = InferencePriority::MIN_LATENCY;
-        options->priority3 = InferencePriority::MIN_MEMORY_USAGE;
-        return;
-      case InferencePriority::UNKNOWN:
-      case InferencePriority::AUTO:
-        // Invalid and unreachable option.
-        return;
-    }
-  }
-
-  if (options->priority3 == InferencePriority::AUTO)
-  {
-    // Simply add missing priority
-    if (GetPosition(*options, InferencePriority::MIN_LATENCY) == 4)
-    {
-      options->priority3 = InferencePriority::MIN_LATENCY;
-    }
-    else if (GetPosition(*options, InferencePriority::MAX_PRECISION) == 4)
-    {
-      options->priority3 = InferencePriority::MAX_PRECISION;
-    }
-    else if (GetPosition(*options, InferencePriority::MIN_MEMORY_USAGE) == 4)
-    {
-      options->priority3 = InferencePriority::MIN_MEMORY_USAGE;
-    }
-  }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Api.h b/runtime/onert/backend/gpu_cl/open_cl/Api.h
deleted file mode 100644
index 35be3d99c..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Api.h
+++ /dev/null
@@ -1,359 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_API_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_API_H__
-
-// Usage example:
-//
-//   // Builder is created from a model using GPU-specific parameters.
-//   std::unique_ptr<InferenceBuilder> builder = ...;
-//
-//   // input data is coming from a texture
-//   // output data goes to CPU
-//   builder->SetInputObjectDef(0, {DataType::FLOAT16, DataLayout::PHWC4,
-//                                  ObjectType::OPENGL_TEXTURE, true});
-//   builder->SetOutputObjectDef(0, {DataType::FLOAT32, DataLayout::BHWC,
-//                                  ObjectType::CPU_MEMORY, false});
-//   std::unique_ptr<InferenceRunner> runner;
-//   RETURN_IF_ERROR(builder->Build(&runner));  // may take significant time.
-//   RETURN_IF_ERROR(
-//       runner->SetInputObject(0, OpenGlTexture{texture_ud, texture_format}));
-//   RETURN_IF_ERROR(runner->Run());
-
-#include <cstdint>
-#include <memory>
-#include <vector>
-
-#include "absl/types/span.h"
-#include "absl/types/variant.h"
-#include "DataType.h"
-#include "Status.h"
-#include "Util.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// Common abbreviations:
-//   B  - batch
-//   H  - height
-//   W  - width
-//   C  - channels
-//   D  - depth := DivideRoundUp(C, 4)
-//   C4 - is the constant = 4.
-enum class DataLayout
-{
-  UNKNOWN,
-  BHWC,
-  DHWC4,
-  HWDC4,
-  HDWC4,
-};
-
-enum class ObjectType
-{
-  UNKNOWN,
-  CPU_MEMORY,
-  OPENCL_TEXTURE,
-  OPENCL_BUFFER,
-};
-
-struct OpenClBuffer
-{
-  OpenClBuffer() = default;
-  explicit OpenClBuffer(cl_mem new_memobj) : memobj(new_memobj) {}
-
-  cl_mem memobj = nullptr;
-};
-
-struct OpenClTexture
-{
-  OpenClTexture() = default;
-  explicit OpenClTexture(cl_mem new_memobj) : memobj(new_memobj) {}
-
-  cl_mem memobj = nullptr;
-  // TODO(akulik): should it specify texture format?
-};
-
-struct CpuMemory
-{
-  CpuMemory() = default;
-  CpuMemory(void *new_data, size_t new_size_bytes) : data(new_data), size_bytes(new_size_bytes) {}
-
-  void *data = nullptr;
-  size_t size_bytes = 0;
-};
-
-template <typename T> inline CpuMemory MakeCpuMemory(absl::Span<T> t)
-{
-  CpuMemory m;
-  m.data = t.data();
-  m.size_bytes = t.size() * sizeof(T);
-  return m;
-}
-
-template <typename T> inline CpuMemory MakeReadableCpuMemory(absl::Span<const T> t)
-{
-  CpuMemory m;
-  m.data = const_cast<T *>(t.data());
-  m.size_bytes = t.size() * sizeof(T);
-  return m;
-}
-
-// Defines object representation.
-struct ObjectDef
-{
-  DataType data_type = DataType::UNKNOWN;
-  DataLayout data_layout = DataLayout::UNKNOWN;
-  ObjectType object_type = ObjectType::UNKNOWN;
-
-  // If true, then object is managed externally and needs to be provided to
-  // InferenceRunner by a user before running inference.
-  //
-  // User-provided objects will not be re-used internally for any purpose to
-  // lower overall memory usage.
-  bool user_provided = false;
-
-  bool operator==(const ObjectDef &other) const
-  {
-    return data_type == other.data_type && data_layout == other.data_layout &&
-           object_type == other.object_type && user_provided == other.user_provided;
-  }
-};
-
-bool IsValid(const ObjectDef &def);
-
-struct Dimensions
-{
-  Dimensions() : b(1), h(1), w(1), c(1) {}
-
-  Dimensions(int32_t batch, int32_t height, int32_t width, int32_t channels)
-    : b(batch), h(height), w(width), c(channels)
-  {
-  }
-
-  int32_t d() const { return DivideRoundUp(c, 4); }
-
-  int32_t product() const { return b * h * w * c; }
-
-  bool operator==(const Dimensions &other) const
-  {
-    return b == other.b && h == other.h && w == other.w && c == other.c;
-  }
-
-  int32_t b;
-  int32_t h;
-  int32_t w;
-  int32_t c;
-};
-
-// Connects tensor shape with corresponding object definition.
-struct TensorObjectDef
-{
-  // Dimensions semantic is defined by corresponding DataLayout.
-  Dimensions dimensions;
-  ObjectDef object_def;
-
-  bool operator==(const TensorObjectDef &other) const
-  {
-    return dimensions == other.dimensions && object_def == other.object_def;
-  }
-};
-
-// @return true if tensor object def is defined.
-bool IsValid(const TensorObjectDef &def);
-
-// @return the number of elements in a tensor object.
-uint32_t NumElements(const TensorObjectDef &def);
-
-using TensorObject = absl::variant<absl::monostate, CpuMemory, OpenClBuffer, OpenClTexture>;
-
-// @return true if object is set and corresponding values are defined.
-bool IsValid(const TensorObjectDef &def, const TensorObject &object);
-
-ObjectType GetType(const TensorObject &object);
-
-// @return true if corresponding object is set for the given type
-bool IsObjectPresent(ObjectType type, const TensorObject &obj);
-
-class InferenceRunner;
-
-// Allows to inspect and change input and output definitions before a graph is
-// prepared for the inference.
-class InferenceBuilder
-{
-public:
-  virtual ~InferenceBuilder() {}
-
-  // Returns inference graph inputs and outputs definitions.
-  virtual std::vector<TensorObjectDef> inputs() const = 0;
-  virtual std::vector<TensorObjectDef> outputs() const = 0;
-
-  // Sets new shape for the input if underlying implementation and graph
-  // structure allows dynamic tensors.
-  virtual absl::Status SetInputShape(int index, const Dimensions &dimensions) = 0;
-
-  // Updates object definitions for the given index. Implementation may allow
-  // to use different layouts and/or data type conversions between objects
-  // defined in a graph and given objects, for example:
-  //   input '0' is DataType::FLOAT32, DataLayout::BHWC.
-  //   A user, however, has an input in DataType::FLOAT16, DataLayout::PHWC4.
-  //   An implementation may allow this transformation to happen automatically
-  //   under the hood.
-  virtual absl::Status SetInputObjectDef(int index, ObjectDef def) = 0;
-  virtual absl::Status SetOutputObjectDef(int index, ObjectDef def) = 0;
-  virtual absl::Status SetAllInputObjectDefsTo(ObjectDef def)
-  {
-    auto input_defs = inputs();
-    for (size_t i = 0; i < input_defs.size(); ++i)
-    {
-      RETURN_IF_ERROR(SetInputObjectDef(i, def));
-    }
-    return absl::OkStatus();
-  }
-  virtual absl::Status SetAllOutputObjectDefsTo(ObjectDef def)
-  {
-    auto output_defs = outputs();
-    for (size_t i = 0; i < output_defs.size(); ++i)
-    {
-      RETURN_IF_ERROR(SetOutputObjectDef(i, def));
-    }
-    return absl::OkStatus();
-  }
-
-  // Creates new instance of the inference runner. InferenceBuilder stays valid
-  // and could be used to create another inference runner if needed.
-  //
-  // This method may take significant time to prepare new inference runner. For
-  // example, it may require to compile OpenGL shaders.
-  virtual absl::Status Build(std::unique_ptr<InferenceRunner> *runner) = 0;
-};
-
-// Runs prepared inference. Every object marked as external needs to be set
-// prior calling Run method.
-class InferenceRunner
-{
-public:
-  virtual ~InferenceRunner() {}
-
-  // Returns inference graph inputs and outputs definitions.
-  virtual std::vector<TensorObjectDef> inputs() const = 0;
-  virtual std::vector<TensorObjectDef> outputs() const = 0;
-
-  // Getters provide access to underlying objects for the given index.
-  // Setters allow to set or change external object for the given index. Note,
-  // object need to match object definition set before in InferenceBuilder.
-
-  virtual absl::Status GetInputObject(int index, TensorObject *object) = 0;
-  virtual absl::Status GetOutputObject(int index, TensorObject *object) = 0;
-  virtual absl::Status SetInputObject(int index, TensorObject object) = 0;
-  virtual absl::Status SetOutputObject(int index, TensorObject object) = 0;
-
-  virtual absl::Status Run() = 0;
-};
-
-// Encapsulated compilation/runtime tradeoffs.
-enum class InferenceUsage
-{
-  UNKNOWN,
-
-  // InferenceRunner will be used only once. Therefore, it is important to
-  // minimize bootstrap time as well.
-  FAST_SINGLE_ANSWER,
-
-  // Prefer maximizing the throughput. Same inference runner will be used
-  // repeatedly on different inputs.
-  SUSTAINED_SPEED,
-};
-
-// Defines aspects to control while instantiating a runner.
-enum class InferencePriority
-{
-  UNKNOWN,
-
-  AUTO,
-
-  MIN_LATENCY,
-
-  MAX_PRECISION,
-
-  MIN_MEMORY_USAGE,
-};
-
-struct InferenceOptions
-{
-  InferenceUsage usage = InferenceUsage::SUSTAINED_SPEED;
-
-  // Ordered priorities provide better understanding of desired semantics,
-  // where priority(n) is more important than priority(n+1).
-  // AUTO priority is needed when a single priority is the most important
-  // factor. For example, priority1 = InferencePriority::MIN_LATENCY and leaving
-  // everything else to AUTO would result in configuration that achieves maximum
-  // performance.
-  //
-  // AUTO priority can only be used when higher priorities are fully specified.
-  // For example:
-  //   VALID:   priority1 = MIN_LATENCY, priority2 = AUTO, priority3 = AUTO
-  //   VALID:   priority1 = MIN_LATENCY, priority2 = MAX_PRECISION,
-  //            priority3 = AUTO
-  //   INVALID: priority1 = AUTO, priority2 = MIN_LATENCY, priority3 = AUTO
-  //   INVALID: priority1 = MIN_LATENCY, priority2 = AUTO,
-  //            priority3 = MAX_PRECISION
-  // Invalid priorities will result in error.
-  InferencePriority priority1 = InferencePriority::MAX_PRECISION;
-
-  InferencePriority priority2 = InferencePriority::AUTO;
-
-  InferencePriority priority3 = InferencePriority::AUTO;
-};
-
-// Returns a position number for the priority. If priority is missing,
-// then it it would return 'max num priorities + 1'.
-int GetPosition(const InferenceOptions &options, InferencePriority p);
-
-// Return true if options are valid.
-bool IsValid(const InferenceOptions &options);
-
-// Resolves AUTO priorities and specifies them explicitly.
-// Note, no-one should assume that these mappings will not change.
-// Technically this function is declared here for code re-use purposes and
-// by no means it should be treated as canonical way to resolve AUTO.
-void ResolveAutoPriority(InferenceOptions *options);
-
-enum class PriorityImportance
-{
-  UNKNOWN,
-  HIGHER,
-  LOWER,
-};
-
-// If both p1 and p2 are not present in options, return UNKNOWN
-// If p1 is present, but p2 is not, return HIGHER
-// If p2 is present, but p1 is not, return LOWER
-// If both are present, and p1 is more important, return HIGHER, otherwise,
-// LOWER.
-PriorityImportance GetRelativeImportance(const InferenceOptions &options, InferencePriority p1,
-                                         InferencePriority p2);
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_API_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Arguments.cc b/runtime/onert/backend/gpu_cl/open_cl/Arguments.cc
deleted file mode 100644
index a7f86bffc..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Arguments.cc
+++ /dev/null
@@ -1,926 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Arguments.h"
-
-#include "absl/strings/ascii.h"
-#include "absl/strings/str_cat.h"
-#include "absl/strings/str_replace.h"
-#include "absl/strings/str_split.h"
-#include "absl/strings/substitute.h"
-
-#include "AccessType.h"
-#include "TensorType.h"
-#include "DataType.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-namespace
-{
-
-bool IsWordSymbol(char symbol) { return absl::ascii_isalnum(symbol) || symbol == '_'; }
-
-std::string GetNextWord(const std::string &code, size_t first_position)
-{
-  size_t pos = first_position;
-  char t = code[pos];
-  while (IsWordSymbol(t))
-  {
-    pos++;
-    t = code[pos];
-  }
-  return code.substr(first_position, pos - first_position);
-}
-
-size_t FindEnclosingBracket(const std::string &text, size_t first_pos, char bracket)
-{
-  const std::map<char, char> brackets = {
-    {'(', ')'},
-    {'{', '}'},
-    {'[', ']'},
-    {'<', '>'},
-  };
-  char b_open = bracket;
-  auto it = brackets.find(b_open);
-  if (it == brackets.end())
-  {
-    return -1;
-  }
-  char b_close = it->second;
-  size_t pos = first_pos;
-  int opened = 1;
-  int closed = 0;
-  while (opened != closed && pos < text.size())
-  {
-    if (text[pos] == b_open)
-    {
-      opened++;
-    }
-    else if (text[pos] == b_close)
-    {
-      closed++;
-    }
-    pos++;
-  }
-  if (opened == closed)
-  {
-    return pos;
-  }
-  else
-  {
-    return -1;
-  }
-}
-
-absl::Status ParseArgsInsideBrackets(const std::string &text, size_t open_bracket_pos,
-                                     size_t *close_bracket_pos, std::vector<std::string> *args)
-{
-  *close_bracket_pos = FindEnclosingBracket(text, open_bracket_pos + 1, text[open_bracket_pos]);
-  if (*close_bracket_pos == static_cast<size_t>(-1))
-  {
-    return absl::NotFoundError("Not found enclosing bracket");
-  }
-  std::string str_args =
-    text.substr(open_bracket_pos + 1, *close_bracket_pos - open_bracket_pos - 2);
-  std::vector<absl::string_view> words = absl::StrSplit(str_args, ',');
-  args->reserve(words.size());
-  for (const auto &word : words)
-  {
-    absl::string_view arg = absl::StripAsciiWhitespace(word);
-    if (!arg.empty())
-    {
-      args->push_back(std::string(arg));
-    }
-  }
-  return absl::OkStatus();
-}
-
-void ReplaceAllWords(const std::string &old_word, const std::string &new_word, std::string *str)
-{
-  size_t position = str->find(old_word);
-  while (position != std::string::npos)
-  {
-    char prev = position == 0 ? '.' : (*str)[position - 1];
-    char next = position + old_word.size() < str->size() ? (*str)[position + old_word.size()] : '.';
-    if (IsWordSymbol(prev) || IsWordSymbol(next))
-    {
-      position = str->find(old_word, position + 1);
-      continue;
-    }
-    str->replace(position, old_word.size(), new_word);
-    position = str->find(old_word, position + new_word.size());
-  }
-}
-
-std::string RenameArg(const std::vector<std::string> &object_names, const std::string &postfix,
-                      const std::string &arg_name)
-{
-  for (const auto &object_name : object_names)
-  {
-    if (absl::StartsWith(arg_name, object_name) && arg_name.size() > object_name.size() &&
-        arg_name[object_name.size()] == '_')
-    {
-      return object_name + postfix +
-             arg_name.substr(object_name.size(), arg_name.size() - object_name.size());
-    }
-  }
-  return arg_name + postfix;
-}
-
-void AppendArgument(const std::string &arg, std::string *args)
-{
-  if (!args->empty())
-  {
-    absl::StrAppend(args, ",\n  ");
-  }
-  absl::StrAppend(args, arg);
-}
-
-std::string GetImageModifier(AccessType access)
-{
-  switch (access)
-  {
-    case AccessType::READ:
-      return "__read_only";
-    case AccessType::WRITE:
-      return "__write_only";
-    case AccessType::READ_WRITE:
-      return "__read_write";
-    default:
-      throw std::runtime_error("Invalid AccessType");
-  }
-}
-
-std::string GetDefaultSamplers(const DeviceInfo &device_info)
-{
-  std::string result;
-  result += "__constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | "
-            "CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n";
-  if (device_info.IsAdreno3xx())
-  {
-    // Unfortunately, CLK_ADDRESS_CLAMP is very slow on Adreno3xx and
-    // we can observe huge register overhead when compared to other modes.
-
-    // While using CLK_ADDRESS_NONE with out-of-range image coordinates is
-    // undefined in the OpenCL specification, we have observed that
-    // CLK_ADDRESS_NONE works like CLK_ADDRESS_CLAMP for out-of-range image
-    // coordinates for RGBA F16/F32 textures on Adreno3xx devices. Using
-    // CLK_ADDRESS_NONE is significantly faster than CLK_ADDRESS_CLAMP on Adreno
-    // 3xx.
-    result += "__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | "
-              "CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n";
-  }
-  else
-  {
-    result += "__constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | "
-              "CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;\n";
-  }
-
-  return result;
-}
-
-} // namespace
-
-// Static
-constexpr char Arguments::kArgsPrefix[];
-
-Arguments::Arguments(Arguments &&args)
-  : int_values_(std::move(args.int_values_)),
-    shared_int4s_data_(std::move(args.shared_int4s_data_)),
-    float_values_(std::move(args.float_values_)),
-    shared_float4s_data_(std::move(args.shared_float4s_data_)), buffers_(std::move(args.buffers_)),
-    images2d_(std::move(args.images2d_)), image2d_arrays_(std::move(args.image2d_arrays_)),
-    images3d_(std::move(args.images3d_)), image_buffers_(std::move(args.image_buffers_)),
-    custom_memories_(std::move(args.custom_memories_)), object_refs_(std::move(args.object_refs_)),
-    objects_(std::move(args.objects_))
-{
-}
-Arguments &Arguments::operator=(Arguments &&args)
-{
-  if (this != &args)
-  {
-    int_values_ = std::move(args.int_values_);
-    shared_int4s_data_ = std::move(args.shared_int4s_data_);
-    float_values_ = std::move(args.float_values_);
-    shared_float4s_data_ = std::move(args.shared_float4s_data_);
-    buffers_ = std::move(args.buffers_);
-    images2d_ = std::move(args.images2d_);
-    image2d_arrays_ = std::move(args.image2d_arrays_);
-    images3d_ = std::move(args.images3d_);
-    image_buffers_ = std::move(args.image_buffers_);
-    custom_memories_ = std::move(args.custom_memories_);
-    object_refs_ = std::move(args.object_refs_);
-    objects_ = std::move(args.objects_);
-  }
-  return *this;
-}
-
-void Arguments::AddFloat(const std::string &name, float value)
-{
-  float_values_[name].value = value;
-}
-void Arguments::AddInt(const std::string &name, int value) { int_values_[name].value = value; }
-void Arguments::AddBuffer(const std::string &name, const GPUBufferDescriptor &desc)
-{
-  buffers_[name] = desc;
-}
-void Arguments::AddImage2D(const std::string &name, const GPUImage2DDescriptor &desc)
-{
-  images2d_[name] = desc;
-}
-
-void Arguments::AddImage2DArray(const std::string &name, const GPUImage2DArrayDescriptor &desc)
-{
-  image2d_arrays_[name] = desc;
-}
-
-void Arguments::AddImage3D(const std::string &name, const GPUImage3DDescriptor &desc)
-{
-  images3d_[name] = desc;
-}
-
-void Arguments::AddImageBuffer(const std::string &name, const GPUImageBufferDescriptor &desc)
-{
-  image_buffers_[name] = desc;
-}
-
-void Arguments::AddCustomMemory(const std::string &name, const GPUCustomMemoryDescriptor &desc)
-{
-  custom_memories_[name] = desc;
-}
-
-void Arguments::AddObjectRef(const std::string &name, AccessType access_type,
-                             GPUObjectDescriptorPtr &&descriptor_ptr)
-{
-  descriptor_ptr->SetAccess(access_type);
-  object_refs_[name] = {std::move(descriptor_ptr)};
-}
-
-void Arguments::AddObject(const std::string &name, GPUObjectDescriptorPtr &&descriptor_ptr)
-{
-  descriptor_ptr->SetAccess(AccessType::READ);
-  objects_[name] = {nullptr, std::move(descriptor_ptr)};
-}
-
-void Arguments::AddGPUResources(const std::string &name, const GPUResources &resources)
-{
-  for (const auto &r : resources.ints)
-  {
-    AddInt(absl::StrCat(name, "_", r));
-  }
-  for (const auto &r : resources.floats)
-  {
-    AddFloat(absl::StrCat(name, "_", r));
-  }
-  for (const auto &r : resources.buffers)
-  {
-    AddBuffer(absl::StrCat(name, "_", r.first), r.second);
-  }
-  for (const auto &r : resources.images2d)
-  {
-    AddImage2D(absl::StrCat(name, "_", r.first), r.second);
-  }
-  for (const auto &r : resources.image2d_arrays)
-  {
-    AddImage2DArray(absl::StrCat(name, "_", r.first), r.second);
-  }
-  for (const auto &r : resources.images3d)
-  {
-    AddImage3D(absl::StrCat(name, "_", r.first), r.second);
-  }
-  for (const auto &r : resources.image_buffers)
-  {
-    AddImageBuffer(absl::StrCat(name, "_", r.first), r.second);
-  }
-  for (const auto &r : resources.custom_memories)
-  {
-    AddCustomMemory(absl::StrCat(name, "_", r.first), r.second);
-  }
-}
-
-absl::Status Arguments::SetInt(const std::string &name, int value)
-{
-  auto it = int_values_.find(name);
-  if (it == int_values_.end())
-  {
-    return absl::NotFoundError(absl::StrCat("No int argument with name - ", name));
-  }
-  it->second.value = value;
-  if (it->second.active)
-  {
-    shared_int4s_data_[it->second.offset] = value;
-  }
-  return absl::OkStatus();
-}
-
-absl::Status Arguments::SetFloat(const std::string &name, float value)
-{
-  auto it = float_values_.find(name);
-  if (it == float_values_.end())
-  {
-    return absl::NotFoundError(absl::StrCat("No float argument with name - ", name));
-  }
-  it->second.value = value;
-  if (it->second.active)
-  {
-    shared_float4s_data_[it->second.offset] = value;
-  }
-  return absl::OkStatus();
-}
-
-absl::Status Arguments::SetImage2D(const std::string &name, cl_mem memory)
-{
-  auto it = images2d_.find(name);
-  if (it == images2d_.end())
-  {
-    return absl::NotFoundError(absl::StrCat("No image2D argument with name - ", name));
-  }
-  it->second.memory = memory;
-  return absl::OkStatus();
-}
-
-absl::Status Arguments::SetBuffer(const std::string &name, cl_mem memory)
-{
-  auto it = buffers_.find(name);
-  if (it == buffers_.end())
-  {
-    return absl::NotFoundError(absl::StrCat("No buffer argument with name - ", name));
-  }
-  it->second.memory = memory;
-  return absl::OkStatus();
-}
-
-absl::Status Arguments::SetImage2DArray(const std::string &name, cl_mem memory)
-{
-  auto it = image2d_arrays_.find(name);
-  if (it == image2d_arrays_.end())
-  {
-    return absl::NotFoundError(absl::StrCat("No image2D array argument with name - ", name));
-  }
-  it->second.memory = memory;
-  return absl::OkStatus();
-}
-
-absl::Status Arguments::SetImage3D(const std::string &name, cl_mem memory)
-{
-  auto it = images3d_.find(name);
-  if (it == images3d_.end())
-  {
-    return absl::NotFoundError(absl::StrCat("No image3D argument with name - ", name));
-  }
-  it->second.memory = memory;
-  return absl::OkStatus();
-}
-
-absl::Status Arguments::SetImageBuffer(const std::string &name, cl_mem memory)
-{
-  auto it = image_buffers_.find(name);
-  if (it == image_buffers_.end())
-  {
-    return absl::NotFoundError(absl::StrCat("No image buffer argument with name - ", name));
-  }
-  it->second.memory = memory;
-  return absl::OkStatus();
-}
-
-absl::Status Arguments::SetCustomMemory(const std::string &name, cl_mem memory)
-{
-  auto it = custom_memories_.find(name);
-  if (it == custom_memories_.end())
-  {
-    return absl::NotFoundError(absl::StrCat("No custom memory argument with name - ", name));
-  }
-  it->second.memory = memory;
-  return absl::OkStatus();
-}
-
-absl::Status Arguments::SetObjectRef(const std::string &name, const GPUObject *object)
-{
-  auto it = object_refs_.find(name);
-  if (it == object_refs_.end())
-  {
-    return absl::NotFoundError(absl::StrCat("No object ref with name - ", name));
-  }
-  GPUResourcesWithValue resources;
-  RETURN_IF_ERROR(object->GetGPUResources(it->second.descriptor.get(), &resources));
-  return SetGPUResources(name, resources);
-}
-
-absl::Status Arguments::SetGPUResources(const std::string &name,
-                                        const GPUResourcesWithValue &resources)
-{
-  for (const auto &r : resources.ints)
-  {
-    RETURN_IF_ERROR(SetInt(absl::StrCat(name, "_", r.first), r.second));
-  }
-  for (const auto &r : resources.floats)
-  {
-    RETURN_IF_ERROR(SetFloat(absl::StrCat(name, "_", r.first), r.second));
-  }
-  for (const auto &r : resources.buffers)
-  {
-    RETURN_IF_ERROR(SetBuffer(absl::StrCat(name, "_", r.first), r.second));
-  }
-  for (const auto &r : resources.images2d)
-  {
-    RETURN_IF_ERROR(SetImage2D(absl::StrCat(name, "_", r.first), r.second));
-  }
-  for (const auto &r : resources.image2d_arrays)
-  {
-    RETURN_IF_ERROR(SetImage2DArray(absl::StrCat(name, "_", r.first), r.second));
-  }
-  for (const auto &r : resources.images3d)
-  {
-    RETURN_IF_ERROR(SetImage3D(absl::StrCat(name, "_", r.first), r.second));
-  }
-  for (const auto &r : resources.image_buffers)
-  {
-    RETURN_IF_ERROR(SetImageBuffer(absl::StrCat(name, "_", r.first), r.second));
-  }
-  for (const auto &r : resources.custom_memories)
-  {
-    RETURN_IF_ERROR(SetCustomMemory(absl::StrCat(name, "_", r.first), r.second));
-  }
-  return absl::OkStatus();
-}
-void Arguments::RenameArgs(const std::string &postfix, std::string *code) const
-{
-  size_t next_position = code->find(kArgsPrefix);
-  while (next_position != std::string::npos)
-  {
-    size_t arg_pos = next_position + strlen(kArgsPrefix);
-    std::string arg_name = GetNextWord(*code, arg_pos);
-    code->replace(arg_pos, arg_name.size(), arg_name + postfix);
-    next_position = code->find(kArgsPrefix, arg_pos + arg_name.size());
-  }
-}
-
-absl::Status Arguments::Merge(Arguments &&args, const std::string &postfix)
-{
-  std::vector<std::string> object_names;
-  object_names.reserve(args.object_refs_.size() + args.objects_.size());
-  for (auto &v : args.object_refs_)
-  {
-    object_names.push_back(v.first);
-    const std::string name = v.first + postfix;
-    if (object_refs_.find(name) != object_refs_.end())
-    {
-      return absl::InvalidArgumentError(
-        absl::StrCat("Object reference name collision. Name - ", name));
-    }
-    object_refs_[name] = {std::move(v.second.descriptor)};
-  }
-  for (auto &v : args.objects_)
-  {
-    object_names.push_back(v.first);
-    const std::string name = v.first + postfix;
-    if (objects_.find(name) != objects_.end())
-    {
-      return absl::InvalidArgumentError(absl::StrCat("Object name collision. Name - ", name));
-    }
-    objects_[name] = {std::move(v.second.obj_ptr), std::move(v.second.descriptor)};
-  }
-  for (const auto &v : args.int_values_)
-  {
-    AddInt(RenameArg(object_names, postfix, v.first), v.second.value);
-  }
-  for (const auto &v : args.float_values_)
-  {
-    AddFloat(RenameArg(object_names, postfix, v.first), v.second.value);
-  }
-  for (const auto &v : args.buffers_)
-  {
-    AddBuffer(RenameArg(object_names, postfix, v.first), v.second);
-  }
-  for (const auto &v : args.images2d_)
-  {
-    AddImage2D(RenameArg(object_names, postfix, v.first), v.second);
-  }
-  for (const auto &v : args.image2d_arrays_)
-  {
-    AddImage2DArray(RenameArg(object_names, postfix, v.first), v.second);
-  }
-  for (const auto &v : args.images3d_)
-  {
-    AddImage3D(RenameArg(object_names, postfix, v.first), v.second);
-  }
-  for (const auto &v : args.image_buffers_)
-  {
-    AddImageBuffer(RenameArg(object_names, postfix, v.first), v.second);
-  }
-  for (const auto &v : args.custom_memories_)
-  {
-    AddCustomMemory(RenameArg(object_names, postfix, v.first), v.second);
-  }
-  return absl::OkStatus();
-}
-
-absl::Status Arguments::TransformToCLCode(const DeviceInfo &device_info,
-                                          const std::map<std::string, std::string> &linkables,
-                                          std::string *code)
-{
-  RETURN_IF_ERROR(AddObjectArgs());
-  RETURN_IF_ERROR(ResolveSelectorsPass(linkables, code));
-  ResolveArgsPass(device_info, code);
-  *code = absl::Substitute(*code, GetListOfArgs());
-  *code = GetDefaultSamplers(device_info) + *code;
-  return absl::OkStatus();
-}
-
-std::string Arguments::GetListOfArgs()
-{
-  std::string result;
-  for (auto &t : buffers_)
-  {
-    const std::string type_name = t.second.data_type == DataType::FLOAT32 ? "float" : "half";
-    std::string attributes;
-    for (const auto &attr : t.second.attributes)
-    {
-      attributes += absl::StrCat("  __attribute__((", attr, "))");
-    }
-    AppendArgument(absl::StrCat(MemoryTypeToCLType(t.second.memory_type), " ",
-                                ToCLDataType(t.second.data_type, t.second.element_size), "* ",
-                                t.first, attributes),
-                   &result);
-  }
-  for (auto &t : image_buffers_)
-  {
-    AppendArgument(
-      absl::StrCat(GetImageModifier(t.second.access_type), " image1d_buffer_t ", t.first), &result);
-  }
-  for (auto &t : images2d_)
-  {
-    AppendArgument(absl::StrCat(GetImageModifier(t.second.access_type), " image2d_t ", t.first),
-                   &result);
-  }
-  for (auto &t : image2d_arrays_)
-  {
-    AppendArgument(
-      absl::StrCat(GetImageModifier(t.second.access_type), " image2d_array_t ", t.first), &result);
-  }
-  for (auto &t : images3d_)
-  {
-    AppendArgument(absl::StrCat(GetImageModifier(t.second.access_type), " image3d_t ", t.first),
-                   &result);
-  }
-  for (auto &t : custom_memories_)
-  {
-    AppendArgument(absl::StrCat(t.second.type_name, " ", t.first), &result);
-  }
-  for (uint32_t i = 0; i < shared_int4s_data_.size() / 4; ++i)
-  {
-    AppendArgument(absl::StrCat("int4 shared_int4_", i), &result);
-  }
-  for (uint32_t i = 0; i < shared_float4s_data_.size() / 4; ++i)
-  {
-    AppendArgument(absl::StrCat("float4 shared_float4_", i), &result);
-  }
-  return result;
-}
-
-absl::Status Arguments::Bind(cl_kernel kernel, int offset)
-{
-  for (auto &t : buffers_)
-  {
-    const int error_code = clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
-    if (error_code != CL_SUCCESS)
-    {
-      return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ",
-                                             CLErrorCodeToString(error_code), "(at index - ",
-                                             offset, ")"));
-    }
-    offset++;
-  }
-  for (auto &t : image_buffers_)
-  {
-    const int error_code = clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
-    if (error_code != CL_SUCCESS)
-    {
-      return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ",
-                                             CLErrorCodeToString(error_code), "(at index - ",
-                                             offset, ")"));
-    }
-    offset++;
-  }
-  for (auto &t : images2d_)
-  {
-    const int error_code = clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
-    if (error_code != CL_SUCCESS)
-    {
-      return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ",
-                                             CLErrorCodeToString(error_code), "(at index - ",
-                                             offset, ")"));
-    }
-    offset++;
-  }
-  for (auto &t : image2d_arrays_)
-  {
-    const int error_code = clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
-    if (error_code != CL_SUCCESS)
-    {
-      return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ",
-                                             CLErrorCodeToString(error_code), "(at index - ",
-                                             offset, ")"));
-    }
-    offset++;
-  }
-  for (auto &t : images3d_)
-  {
-    const int error_code = clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
-    if (error_code != CL_SUCCESS)
-    {
-      return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ",
-                                             CLErrorCodeToString(error_code), "(at index - ",
-                                             offset, ")"));
-    }
-    offset++;
-  }
-  for (auto &t : custom_memories_)
-  {
-    const int error_code = clSetKernelArg(kernel, offset, sizeof(cl_mem), &t.second.memory);
-    if (error_code != CL_SUCCESS)
-    {
-      return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ",
-                                             CLErrorCodeToString(error_code), "(at index - ",
-                                             offset, ")"));
-    }
-    offset++;
-  }
-  for (size_t i = 0; i < shared_int4s_data_.size() / 4; ++i)
-  {
-    const int error_code =
-      clSetKernelArg(kernel, offset, sizeof(int32_t) * 4, &shared_int4s_data_[i * 4]);
-    if (error_code != CL_SUCCESS)
-    {
-      return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ",
-                                             CLErrorCodeToString(error_code), "(at index - ",
-                                             offset, ")"));
-    }
-    offset++;
-  }
-  for (size_t i = 0; i < shared_float4s_data_.size() / 4; ++i)
-  {
-    const int error_code =
-      clSetKernelArg(kernel, offset, sizeof(int32_t) * 4, &shared_float4s_data_[i * 4]);
-    if (error_code != CL_SUCCESS)
-    {
-      return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ",
-                                             CLErrorCodeToString(error_code), "(at index - ",
-                                             offset, ")"));
-    }
-    offset++;
-  }
-  return absl::OkStatus();
-}
-
-std::string Arguments::AddActiveArgument(const std::string &arg_name, bool)
-{
-  {
-    auto it = int_values_.find(arg_name);
-    if (it != int_values_.end())
-    {
-      int int_index;
-      if (it->second.active)
-      {
-        int_index = it->second.offset;
-      }
-      else
-      {
-        it->second.active = true;
-        it->second.offset = shared_int4s_data_.size();
-        int_index = it->second.offset;
-        shared_int4s_data_.push_back(it->second.value);
-      }
-      std::string index = std::to_string(int_index / 4);
-      std::string postfixes[4] = {"x", "y", "z", "w"};
-      return "shared_int4_" + index + "." + postfixes[int_index % 4];
-    }
-  }
-  {
-    auto it = float_values_.find(arg_name);
-    if (it != float_values_.end())
-    {
-      int float_index;
-      if (it->second.active)
-      {
-        float_index = it->second.offset;
-      }
-      else
-      {
-        it->second.active = true;
-        it->second.offset = shared_float4s_data_.size();
-        float_index = it->second.offset;
-        shared_float4s_data_.push_back(it->second.value);
-      }
-      std::string index = std::to_string(float_index / 4);
-      std::string postfixes[4] = {"x", "y", "z", "w"};
-      return "shared_float4_" + index + "." + postfixes[float_index % 4];
-    }
-  }
-  return arg_name;
-}
-
-void Arguments::ResolveArgsPass(const DeviceInfo &device_info, std::string *code)
-{
-  bool use_f32_for_half_arguments = device_info.IsPowerVR();
-  size_t position = 0;
-  size_t next_position = code->find(kArgsPrefix);
-  while (next_position != std::string::npos)
-  {
-    size_t arg_pos = next_position;
-    next_position += strlen(kArgsPrefix);
-    std::string object_name = GetNextWord(*code, next_position);
-    std::string new_name = AddActiveArgument(object_name, use_f32_for_half_arguments);
-    code->replace(arg_pos, object_name.size() + strlen(kArgsPrefix), new_name);
-    position = arg_pos + new_name.size();
-    next_position = code->find(kArgsPrefix, position);
-  }
-
-  int shared_int4s_aligned_size = AlignByN(shared_int4s_data_.size(), 4);
-  shared_int4s_data_.resize(shared_int4s_aligned_size);
-  int shared_float4s_aligned_size = AlignByN(shared_float4s_data_.size(), 4);
-  shared_float4s_data_.resize(shared_float4s_aligned_size);
-}
-
-void Arguments::ResolveObjectNames(const std::string &object_name,
-                                   const std::vector<std::string> &member_names, std::string *code)
-{
-  for (const auto &member_name : member_names)
-  {
-    const std::string new_name = kArgsPrefix + object_name + "_" + member_name;
-    ReplaceAllWords(member_name, new_name, code);
-  }
-}
-
-GPUObjectDescriptor *Arguments::GetObjectDescriptor(const std::string &object_name) const
-{
-  {
-    auto it = object_refs_.find(object_name);
-    if (it != object_refs_.end())
-    {
-      return it->second.descriptor.get();
-    }
-  }
-  {
-    auto it = objects_.find(object_name);
-    if (it != objects_.end())
-    {
-      return it->second.descriptor.get();
-    }
-  }
-  return nullptr;
-}
-
-absl::Status Arguments::ResolveSelector(const std::map<std::string, std::string> &linkables,
-                                        const std::string &object_name, const std::string &selector,
-                                        const std::vector<std::string> &args,
-                                        const std::vector<std::string> &template_args,
-                                        std::string *result)
-{
-  const GPUObjectDescriptor *desc_ptr = GetObjectDescriptor(object_name);
-  if (!desc_ptr)
-  {
-    return absl::NotFoundError(absl::StrCat("No object with name - ", object_name));
-  }
-  auto names = desc_ptr->GetGPUResources().GetNames();
-  const auto *tensor_desc = dynamic_cast<const TensorDescriptor *>(desc_ptr);
-  if (tensor_desc && selector == "Write")
-  {
-    auto it = linkables.find(object_name);
-    if (it != linkables.end())
-    {
-      if (desc_ptr->GetAccess() != AccessType::WRITE &&
-          desc_ptr->GetAccess() != AccessType::READ_WRITE)
-      {
-        return absl::FailedPreconditionError(
-          absl::StrCat("Object with name - ", object_name, " should have Write access."));
-      }
-      std::string value_name, x_coord, y_coord, s_coord;
-      RETURN_IF_ERROR(tensor_desc->GetLinkingContextFromWriteSelector(args, &value_name, &x_coord,
-                                                                      &y_coord, &s_coord));
-      // x_coord can have batch size property of link_object
-      ResolveObjectNames(object_name, names, &x_coord);
-      *result = it->second;
-      ReplaceAllWords("in_out_value", value_name, result);
-      ReplaceAllWords("X_COORD", x_coord, result);
-      ReplaceAllWords("Y_COORD", y_coord, result);
-      ReplaceAllWords("S_COORD", s_coord, result);
-      RETURN_IF_ERROR(ResolveSelectorsPass({}, result));
-    }
-  }
-  std::string patch;
-  RETURN_IF_ERROR(desc_ptr->PerformSelector(selector, args, template_args, &patch));
-  ResolveObjectNames(object_name, names, &patch);
-  *result += patch;
-  return absl::OkStatus();
-}
-
-absl::Status Arguments::ResolveSelectorsPass(const std::map<std::string, std::string> &linkables,
-                                             std::string *code)
-{
-  std::string result;
-  size_t position = 0;
-  size_t next_position = code->find(kArgsPrefix);
-  while (next_position != std::string::npos)
-  {
-    size_t arg_pos = next_position;
-    next_position += strlen(kArgsPrefix);
-    std::string object_name = GetNextWord(*code, next_position);
-    char next = (*code)[next_position + object_name.size()];
-    if (next == '.')
-    {
-      next_position += object_name.size() + 1;
-      std::string selector_name = GetNextWord(*code, next_position);
-      next_position += selector_name.size();
-      next = (*code)[next_position];
-      std::vector<std::string> template_args;
-      if (next == '<')
-      {
-        size_t close_bracket_pos;
-        RETURN_IF_ERROR(
-          ParseArgsInsideBrackets(*code, next_position, &close_bracket_pos, &template_args));
-        next_position = close_bracket_pos;
-        next = (*code)[next_position];
-      }
-      if (next != '(')
-      {
-        return absl::NotFoundError(
-          absl::StrCat("Expected ( after ", object_name, ".", selector_name, " call"));
-      }
-      std::vector<std::string> args;
-      size_t close_bracket_pos;
-      RETURN_IF_ERROR(ParseArgsInsideBrackets(*code, next_position, &close_bracket_pos, &args));
-      for (auto &arg : args)
-      {
-        RETURN_IF_ERROR(ResolveSelectorsPass({}, &arg));
-      }
-      std::string patch;
-      RETURN_IF_ERROR(
-        ResolveSelector(linkables, object_name, selector_name, args, template_args, &patch));
-      code->replace(arg_pos, close_bracket_pos - arg_pos, patch);
-      position = arg_pos + patch.size();
-    }
-    else
-    {
-      position = arg_pos + strlen(kArgsPrefix);
-    }
-    next_position = code->find(kArgsPrefix, position);
-  }
-  return absl::OkStatus();
-}
-
-absl::Status Arguments::AllocateObjects(CLContext *context)
-{
-  for (auto &t : objects_)
-  {
-    RETURN_IF_ERROR(t.second.descriptor->CreateGPUObject(context, &t.second.obj_ptr));
-  }
-  return absl::OkStatus();
-}
-
-void Arguments::ReleaseCPURepresentation()
-{
-  for (auto &t : objects_)
-  {
-    t.second.descriptor->Release();
-  }
-}
-
-absl::Status Arguments::AddObjectArgs()
-{
-  for (auto &t : objects_)
-  {
-    AddGPUResources(t.first, t.second.descriptor->GetGPUResources());
-    GPUResourcesWithValue resources;
-    RETURN_IF_ERROR(t.second.obj_ptr->GetGPUResources(t.second.descriptor.get(), &resources));
-    RETURN_IF_ERROR(SetGPUResources(t.first, resources));
-  }
-  for (auto &t : object_refs_)
-  {
-    AddGPUResources(t.first, t.second.descriptor->GetGPUResources());
-  }
-  return absl::OkStatus();
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Arguments.h b/runtime/onert/backend/gpu_cl/open_cl/Arguments.h
deleted file mode 100644
index 0c6ce1edf..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Arguments.h
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_ARGUMENTS_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_ARGUMENTS_H__
-
-#include <map>
-#include <string>
-#include <vector>
-
-#include "ClDevice.h"
-#include "GpuObject.h"
-#include "OpenclWrapper.h"
-
-#include "AccessType.h"
-#include "Types.h"
-#include "Util.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-class ArgumentsBinder
-{
-public:
-  virtual absl::Status SetInt(const std::string &name, int value) = 0;
-  virtual absl::Status SetFloat(const std::string &name, float value) = 0;
-  virtual ~ArgumentsBinder() = default;
-};
-
-class Arguments : public ArgumentsBinder
-{
-public:
-  Arguments() = default;
-  void AddFloat(const std::string &name, float value = 0.0f);
-  void AddInt(const std::string &name, int value = 0);
-  void AddObjectRef(const std::string &name, AccessType access_type,
-                    GPUObjectDescriptorPtr &&descriptor_ptr);
-  void AddObject(const std::string &name, GPUObjectDescriptorPtr &&descriptor_ptr);
-
-  absl::Status SetInt(const std::string &name, int value) override;
-  absl::Status SetFloat(const std::string &name, float value) override;
-  absl::Status SetObjectRef(const std::string &name, const GPUObject *object);
-
-  absl::Status Bind(cl_kernel kernel, int offset = 0);
-
-  void RenameArgs(const std::string &postfix, std::string *code) const;
-  absl::Status Merge(Arguments &&args, const std::string &postfix);
-
-  absl::Status AllocateObjects(CLContext *context);
-  void ReleaseCPURepresentation();
-  absl::Status TransformToCLCode(const DeviceInfo &device_info,
-                                 const std::map<std::string, std::string> &linkables,
-                                 std::string *code);
-
-  // Move only
-  Arguments(Arguments &&args);
-  Arguments &operator=(Arguments &&args);
-  Arguments(const Arguments &) = delete;
-  Arguments &operator=(const Arguments &) = delete;
-
-  ~Arguments() override = default;
-
-private:
-  void AddBuffer(const std::string &name, const GPUBufferDescriptor &desc);
-  void AddImage2D(const std::string &name, const GPUImage2DDescriptor &desc);
-  void AddImage2DArray(const std::string &name, const GPUImage2DArrayDescriptor &desc);
-  void AddImage3D(const std::string &name, const GPUImage3DDescriptor &desc);
-  void AddImageBuffer(const std::string &name, const GPUImageBufferDescriptor &desc);
-  void AddCustomMemory(const std::string &name, const GPUCustomMemoryDescriptor &desc);
-
-  absl::Status SetImage2D(const std::string &name, cl_mem memory);
-  absl::Status SetBuffer(const std::string &name, cl_mem memory);
-  absl::Status SetImage2DArray(const std::string &name, cl_mem memory);
-  absl::Status SetImage3D(const std::string &name, cl_mem memory);
-  absl::Status SetImageBuffer(const std::string &name, cl_mem memory);
-  absl::Status SetCustomMemory(const std::string &name, cl_mem memory);
-
-  std::string GetListOfArgs();
-
-  std::string AddActiveArgument(const std::string &arg_name, bool use_f32_for_halfs);
-  void AddGPUResources(const std::string &name, const GPUResources &resources);
-
-  absl::Status SetGPUResources(const std::string &name, const GPUResourcesWithValue &resources);
-
-  absl::Status AddObjectArgs();
-
-  void ResolveArgsPass(const DeviceInfo &device_info, std::string *code);
-  absl::Status ResolveSelectorsPass(const std::map<std::string, std::string> &linkables,
-                                    std::string *code);
-
-  absl::Status ResolveSelector(const std::map<std::string, std::string> &linkables,
-                               const std::string &object_name, const std::string &selector,
-                               const std::vector<std::string> &args,
-                               const std::vector<std::string> &template_args, std::string *result);
-
-  void ResolveObjectNames(const std::string &object_name,
-                          const std::vector<std::string> &member_names, std::string *code);
-
-  GPUObjectDescriptor *GetObjectDescriptor(const std::string &object_name) const;
-
-  static constexpr char kArgsPrefix[] = "args.";
-
-  struct IntValue
-  {
-    int value;
-
-    // many uniforms generated automatically and not used
-    // to reduce amount of data transferred we adding this optimization
-    bool active = false;
-
-    // offset to shared uniform storage.
-    uint32_t offset = -1;
-  };
-  std::map<std::string, IntValue> int_values_;
-  std::vector<int32_t> shared_int4s_data_;
-
-  struct FloatValue
-  {
-    float value;
-
-    // many uniforms generated automatically and not used
-    // to reduce amount of data transferred we adding this optimization
-    bool active = false;
-
-    // offset to shared uniform storage.
-    uint32_t offset = -1;
-  };
-  std::map<std::string, FloatValue> float_values_;
-  std::vector<float> shared_float4s_data_;
-
-  std::map<std::string, GPUBufferDescriptor> buffers_;
-  std::map<std::string, GPUImage2DDescriptor> images2d_;
-  std::map<std::string, GPUImage2DArrayDescriptor> image2d_arrays_;
-  std::map<std::string, GPUImage3DDescriptor> images3d_;
-  std::map<std::string, GPUImageBufferDescriptor> image_buffers_;
-  std::map<std::string, GPUCustomMemoryDescriptor> custom_memories_;
-
-  struct ObjectRefArg
-  {
-    GPUObjectDescriptorPtr descriptor;
-  };
-  std::map<std::string, ObjectRefArg> object_refs_;
-
-  struct ObjectArg
-  {
-    GPUObjectPtr obj_ptr;
-    GPUObjectDescriptorPtr descriptor;
-  };
-  std::map<std::string, ObjectArg> objects_;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_ARGUMENTS_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Buffer.cc b/runtime/onert/backend/gpu_cl/open_cl/Buffer.cc
deleted file mode 100644
index 64c071921..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Buffer.cc
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Buffer.h"
-
-#include <string>
-
-#include "ClContext.h"
-#include "DataType.h"
-#include "GpuObject.h"
-#include "Util.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-absl::Status CreateBuffer(size_t size_in_bytes, bool gpu_read_only, const void *data,
-                          CLContext *context, Buffer *result)
-{
-  cl_mem buffer;
-  RETURN_IF_ERROR(CreateCLBuffer(context->context(), size_in_bytes, gpu_read_only,
-                                 const_cast<void *>(data), &buffer));
-  *result = Buffer(buffer, size_in_bytes);
-
-  return absl::OkStatus();
-}
-
-} // namespace
-
-BufferDescriptor::BufferDescriptor(BufferDescriptor &&desc)
-  : GPUObjectDescriptor(std::move(desc)), element_type(desc.element_type),
-    element_size(desc.element_size), memory_type(desc.memory_type),
-    attributes(std::move(desc.attributes)), size(desc.size), data(std::move(desc.data))
-{
-}
-
-BufferDescriptor &BufferDescriptor::operator=(BufferDescriptor &&desc)
-{
-  if (this != &desc)
-  {
-    std::swap(element_type, desc.element_type);
-    std::swap(element_size, desc.element_size);
-    std::swap(memory_type, desc.memory_type);
-    attributes = std::move(desc.attributes);
-    std::swap(size, desc.size);
-    data = std::move(desc.data);
-    GPUObjectDescriptor::operator=(std::move(desc));
-  }
-  return *this;
-}
-
-void BufferDescriptor::Release() { data.clear(); }
-
-GPUResources BufferDescriptor::GetGPUResources() const
-{
-  GPUResources resources;
-  GPUBufferDescriptor desc;
-  desc.data_type = element_type;
-  desc.access_type = access_type_;
-  desc.element_size = element_size;
-  desc.memory_type = memory_type;
-  desc.attributes = attributes;
-  resources.buffers.push_back({"buffer", desc});
-  return resources;
-}
-
-absl::Status BufferDescriptor::PerformSelector(const std::string &selector,
-                                               const std::vector<std::string> &args,
-                                               const std::vector<std::string> &template_args,
-                                               std::string *result) const
-{
-  if (selector == "Read")
-  {
-    return PerformReadSelector(args, result);
-  }
-  else if (selector == "GetPtr")
-  {
-    return PerformGetPtrSelector(args, template_args, result);
-  }
-  else
-  {
-    return absl::NotFoundError(
-      absl::StrCat("BufferDescriptor don't have selector with name - ", selector));
-  }
-}
-
-absl::Status BufferDescriptor::PerformReadSelector(const std::vector<std::string> &args,
-                                                   std::string *result) const
-{
-  if (args.size() != 1)
-  {
-    return absl::NotFoundError(
-      absl::StrCat("BufferDescriptor Read require one argument, but ", args.size(), " was passed"));
-  }
-  *result = absl::StrCat("buffer[", args[0], "]");
-  return absl::OkStatus();
-}
-
-absl::Status BufferDescriptor::PerformGetPtrSelector(const std::vector<std::string> &args,
-                                                     const std::vector<std::string> &template_args,
-                                                     std::string *result) const
-{
-  if (args.size() > 1)
-  {
-    return absl::NotFoundError(absl::StrCat(
-      "BufferDescriptor GetPtr require one or zero arguments, but ", args.size(), " was passed"));
-  }
-  if (template_args.size() > 1)
-  {
-    return absl::NotFoundError(absl::StrCat("BufferDescriptor GetPtr require one or zero teemplate "
-                                            "arguments, but ",
-                                            template_args.size(), " was passed"));
-  }
-  std::string conversion;
-  if (template_args.size() == 1)
-  {
-    const std::string type_name = ToCLDataType(element_type, element_size);
-    if (type_name != template_args[0])
-    {
-      conversion = absl::StrCat("(", MemoryTypeToCLType(memory_type), " ", template_args[0], "*)&");
-    }
-  }
-  if (args.empty())
-  {
-    *result = absl::StrCat(conversion, "buffer");
-  }
-  else if (conversion.empty())
-  {
-    *result = absl::StrCat("(buffer + ", args[0], ")");
-  }
-  else
-  {
-    *result = absl::StrCat(conversion, "buffer[", args[0], "]");
-  }
-  return absl::OkStatus();
-}
-
-absl::Status BufferDescriptor::CreateGPUObject(CLContext *context, GPUObjectPtr *result) const
-{
-  Buffer gpu_buffer;
-  RETURN_IF_ERROR(gpu_buffer.CreateFromBufferDescriptor(*this, context));
-  *result = absl::make_unique<Buffer>(std::move(gpu_buffer));
-  return absl::OkStatus();
-}
-
-Buffer::Buffer(cl_mem buffer, size_t size_in_bytes) : buffer_(buffer), size_(size_in_bytes) {}
-
-Buffer::Buffer(Buffer &&buffer) : buffer_(buffer.buffer_), size_(buffer.size_)
-{
-  buffer.buffer_ = nullptr;
-  buffer.size_ = 0;
-}
-
-Buffer &Buffer::operator=(Buffer &&buffer)
-{
-  if (this != &buffer)
-  {
-    Release();
-    std::swap(size_, buffer.size_);
-    std::swap(buffer_, buffer.buffer_);
-  }
-  return *this;
-}
-
-void Buffer::Release()
-{
-  if (buffer_)
-  {
-    clReleaseMemObject(buffer_);
-    buffer_ = nullptr;
-    size_ = 0;
-  }
-}
-
-absl::Status Buffer::GetGPUResources(const GPUObjectDescriptor *obj_ptr,
-                                     GPUResourcesWithValue *resources) const
-{
-  const auto *buffer_desc = dynamic_cast<const BufferDescriptor *>(obj_ptr);
-  if (!buffer_desc)
-  {
-    return absl::InvalidArgumentError("Expected BufferDescriptor on input.");
-  }
-
-  resources->buffers.push_back({"buffer", buffer_});
-  return absl::OkStatus();
-}
-
-absl::Status Buffer::CreateFromBufferDescriptor(const BufferDescriptor &desc, CLContext *context)
-{
-  bool read_only = desc.memory_type == MemoryType::CONSTANT;
-  uint8_t *data_ptr = desc.data.empty() ? nullptr : const_cast<unsigned char *>(desc.data.data());
-  size_ = desc.size;
-  return CreateCLBuffer(context->context(), desc.size, read_only, data_ptr, &buffer_);
-}
-
-absl::Status CreateReadOnlyBuffer(size_t size_in_bytes, CLContext *context, Buffer *result)
-{
-  return CreateBuffer(size_in_bytes, true, nullptr, context, result);
-}
-
-absl::Status CreateReadOnlyBuffer(size_t size_in_bytes, const void *data, CLContext *context,
-                                  Buffer *result)
-{
-  return CreateBuffer(size_in_bytes, true, data, context, result);
-}
-
-absl::Status CreateReadWriteBuffer(size_t size_in_bytes, CLContext *context, Buffer *result)
-{
-  return CreateBuffer(size_in_bytes, false, nullptr, context, result);
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Buffer.h b/runtime/onert/backend/gpu_cl/open_cl/Buffer.h
deleted file mode 100644
index 39e97be6d..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Buffer.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_BUFFER_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_BUFFER_H__
-
-#include "absl/strings/str_cat.h"
-#include "absl/types/span.h"
-
-#include "ClCommandQueue.h"
-#include "ClContext.h"
-#include "GpuObject.h"
-#include "OpenclWrapper.h"
-#include "DataType.h"
-#include "Util.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-struct BufferDescriptor : public GPUObjectDescriptor
-{
-  DataType element_type;
-  int element_size;
-  MemoryType memory_type = MemoryType::GLOBAL;
-  std::vector<std::string> attributes;
-
-  // optional
-  int size = 0;
-  std::vector<uint8_t> data;
-
-  BufferDescriptor() = default;
-  BufferDescriptor(const BufferDescriptor &) = default;
-  BufferDescriptor &operator=(const BufferDescriptor &) = default;
-  BufferDescriptor(BufferDescriptor &&desc);
-  BufferDescriptor &operator=(BufferDescriptor &&desc);
-
-  absl::Status PerformSelector(const std::string &selector, const std::vector<std::string> &args,
-                               const std::vector<std::string> &template_args,
-                               std::string *result) const override;
-
-  GPUResources GetGPUResources() const override;
-  absl::Status PerformReadSelector(const std::vector<std::string> &args, std::string *result) const;
-  absl::Status PerformGetPtrSelector(const std::vector<std::string> &args,
-                                     const std::vector<std::string> &template_args,
-                                     std::string *result) const;
-
-  absl::Status CreateGPUObject(CLContext *context, GPUObjectPtr *result) const override;
-  void Release() override;
-};
-
-// Buffer represent linear GPU data storage with arbitrary data format.
-// Buffer is moveable but not copyable.
-class Buffer : public GPUObject
-{
-public:
-  Buffer() {} // just for using Buffer as a class members
-  Buffer(cl_mem buffer, size_t size_in_bytes);
-
-  // Move only
-  Buffer(Buffer &&buffer);
-  Buffer &operator=(Buffer &&buffer);
-  Buffer(const Buffer &) = delete;
-  Buffer &operator=(const Buffer &) = delete;
-
-  virtual ~Buffer() { Release(); }
-
-  // for profiling and memory statistics
-  uint64_t GetMemorySizeInBytes() const { return size_; }
-
-  cl_mem GetMemoryPtr() const { return buffer_; }
-
-  // Writes data to a buffer. Data should point to a region that
-  // has exact size in bytes as size_in_bytes(constructor parameter).
-  template <typename T> absl::Status WriteData(CLCommandQueue *queue, const std::vector<T> *data);
-
-  // Reads data from Buffer into CPU memory.
-  template <typename T> absl::Status ReadData(CLCommandQueue *queue, std::vector<T> *result) const;
-
-  absl::Status GetGPUResources(const GPUObjectDescriptor *obj_ptr,
-                               GPUResourcesWithValue *resources) const override;
-
-  absl::Status CreateFromBufferDescriptor(const BufferDescriptor &desc, CLContext *context);
-
-private:
-  void Release();
-
-  cl_mem buffer_ = nullptr;
-  size_t size_ = 0;
-};
-
-absl::Status CreateReadOnlyBuffer(size_t size_in_bytes, CLContext *context, Buffer *result);
-
-absl::Status CreateReadOnlyBuffer(size_t size_in_bytes, const void *data, CLContext *context,
-                                  Buffer *result);
-
-absl::Status CreateReadWriteBuffer(size_t size_in_bytes, CLContext *context, Buffer *result);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_BUFFER_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClCommandQueue.cc b/runtime/onert/backend/gpu_cl/open_cl/ClCommandQueue.cc
deleted file mode 100644
index d147b7b13..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClCommandQueue.cc
+++ /dev/null
@@ -1,359 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ClCommandQueue.h"
-
-#include <algorithm>
-#include <map>
-#include <string>
-#include <vector>
-#include <limits>
-
-#include "absl/strings/str_cat.h"
-#include "ClDevice.h"
-#include "ClEvent.h"
-#include "Util.h"
-#include "Types.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-using namespace std;
-
-CLCommandQueue::CLCommandQueue(cl_command_queue queue, bool has_ownership)
-  : queue_(queue), has_ownership_(has_ownership)
-{
-}
-
-CLCommandQueue::CLCommandQueue(CLCommandQueue &&queue)
-  : queue_(queue.queue_), has_ownership_(queue.has_ownership_)
-{
-  queue.queue_ = nullptr;
-}
-
-CLCommandQueue &CLCommandQueue::operator=(CLCommandQueue &&queue)
-{
-  if (this != &queue)
-  {
-    Release();
-    std::swap(queue_, queue.queue_);
-    has_ownership_ = queue.has_ownership_;
-  }
-  return *this;
-}
-
-CLCommandQueue::~CLCommandQueue() { Release(); }
-
-void CLCommandQueue::Release()
-{
-  if (has_ownership_ && queue_)
-  {
-    clReleaseCommandQueue(queue_);
-    queue_ = nullptr;
-  }
-}
-
-absl::Status CLCommandQueue::Dispatch(const CLKernel &kernel, const int3 &work_groups_count,
-                                      const int3 &work_group_size, CLEvent *event)
-{
-  std::vector<size_t> local(3);
-  std::vector<size_t> global(3);
-  for (int i = 0; i < 3; ++i)
-  {
-    local[i] = work_group_size[i];
-    global[i] = work_groups_count[i] * work_group_size[i];
-  }
-  cl_event resulting_event;
-  const int error_code =
-    clEnqueueNDRangeKernel(queue_, kernel.kernel(), 3, nullptr, global.data(), local.data(), 0,
-                           nullptr, event ? &resulting_event : nullptr);
-  if (event)
-  {
-    *event = CLEvent(resulting_event);
-  }
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(
-      absl::StrCat("Failed to clEnqueueNDRangeKernel - ", CLErrorCodeToString(error_code)));
-  }
-  return absl::OkStatus();
-}
-
-absl::Status CLCommandQueue::Dispatch(const CLKernel &kernel, const int3 &work_groups_count,
-                                      const int3 &work_group_size)
-{
-  return Dispatch(kernel, work_groups_count, work_group_size, nullptr);
-}
-
-absl::Status CLCommandQueue::EnqueueEvent(CLEvent *event)
-{
-  cl_event resulting_event;
-  const int error_code = clEnqueueMarker(queue_, &resulting_event);
-  *event = CLEvent(resulting_event);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(
-      absl::StrCat("Failed to clEnqueueMarker - ", CLErrorCodeToString(error_code)));
-  }
-  return absl::OkStatus();
-}
-
-absl::Status CLCommandQueue::EnqueueWriteImage(cl_mem memory, int3 region, const void *data)
-{
-  const size_t origin[] = {0, 0, 0};
-  const size_t r[] = {static_cast<size_t>(region.x), static_cast<size_t>(region.y),
-                      static_cast<size_t>(region.z)};
-  auto error_code =
-    clEnqueueWriteImage(queue_, memory, CL_TRUE, origin, r, 0, 0, data, 0, nullptr, nullptr);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(absl::StrCat("Failed to upload data to GPU (clEnqueueWriteImage) - ",
-                                           CLErrorCodeToString(error_code)));
-  }
-
-  return absl::OkStatus();
-}
-
-absl::Status CLCommandQueue::EnqueueReadImage(cl_mem memory, int3 region, void *data)
-{
-  const size_t origin[] = {0, 0, 0};
-  const size_t r[] = {static_cast<size_t>(region.x), static_cast<size_t>(region.y),
-                      static_cast<size_t>(region.z)};
-  auto error_code =
-    clEnqueueReadImage(queue_, memory, CL_TRUE, origin, r, 0, 0, data, 0, nullptr, nullptr);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(absl::StrCat("Failed to read data from GPU (clEnqueueReadImage) - ",
-                                           CLErrorCodeToString(error_code)));
-  }
-
-  return absl::OkStatus();
-}
-
-absl::Status CLCommandQueue::EnqueueWriteBuffer(cl_mem memory, size_t size_in_bytes,
-                                                const void *data)
-{
-  auto error_code =
-    clEnqueueWriteBuffer(queue_, memory, CL_TRUE, 0, size_in_bytes, data, 0, nullptr, nullptr);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(absl::StrCat("Failed to upload data to GPU (clEnqueueWriteBuffer) - ",
-                                           CLErrorCodeToString(error_code)));
-  }
-  return absl::OkStatus();
-}
-
-absl::Status CLCommandQueue::EnqueueReadBuffer(cl_mem memory, size_t size_in_bytes, void *data)
-{
-  auto error_code =
-    clEnqueueReadBuffer(queue_, memory, CL_TRUE, 0, size_in_bytes, data, 0, nullptr, nullptr);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(absl::StrCat("Failed to read data from GPU (clEnqueueReadBuffer) - ",
-                                           CLErrorCodeToString(error_code)));
-  }
-  return absl::OkStatus();
-}
-
-absl::Status CLCommandQueue::WaitForCompletion()
-{
-  auto error_code = clFinish(queue_);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(
-      absl::StrCat("Failed to clFinish - ", CLErrorCodeToString(error_code)));
-  }
-  return absl::OkStatus();
-}
-
-ProfilingCommandQueue::ProfilingCommandQueue(cl_command_queue queue) : CLCommandQueue(queue, true)
-{
-  events_.reserve(128);
-}
-
-ProfilingCommandQueue::ProfilingCommandQueue(ProfilingCommandQueue &&queue)
-  : CLCommandQueue(std::move(queue)), events_(std::move(queue.events_)),
-    current_label_(std::move(queue.current_label_))
-{
-}
-
-ProfilingCommandQueue &ProfilingCommandQueue::operator=(ProfilingCommandQueue &&queue)
-{
-  if (this != &queue)
-  {
-    events_ = std::move(queue.events_);
-    current_label_ = std::move(queue.current_label_);
-    CLCommandQueue::operator=(std::move(queue));
-  }
-  return *this;
-}
-
-void ProfilingCommandQueue::SetEventsLabel(const std::string &name) { current_label_ = name; }
-
-void ProfilingCommandQueue::ResetMeasurements() { events_.clear(); }
-
-absl::Status ProfilingCommandQueue::Dispatch(const CLKernel &kernel, const int3 &work_groups_count,
-                                             const int3 &work_group_size)
-{
-  events_.push_back(CLEvent());
-  RETURN_IF_ERROR(CLCommandQueue::Dispatch(kernel, work_groups_count, work_group_size,
-                                           &events_[events_.size() - 1]));
-  events_.back().SetName(current_label_);
-  return absl::OkStatus();
-}
-
-absl::Status
-ProfilingCommandQueue::GetBestWorkGroupIndex(const CLKernel &kernel, const DeviceInfo &device_info,
-                                             const std::vector<int3> &work_groups_count,
-                                             const std::vector<int3> &work_group_sizes, int *index)
-{
-  // Some Adreno 3xx can have wrong numbers for some events
-  const bool possible_bug_with_events = device_info.IsAdreno3xx();
-  events_.resize(work_group_sizes.size());
-  for (size_t i = 0; i < work_group_sizes.size(); ++i)
-  {
-    RETURN_IF_ERROR(
-      CLCommandQueue::Dispatch(kernel, work_groups_count[i], work_group_sizes[i], &events_[i]));
-
-    // reducing the speed of memory leak on Mali for some kernels
-    if (device_info.IsMali() && i % 8 == 7)
-    {
-      events_[i - 7].Wait();
-    }
-    if (possible_bug_with_events)
-    {
-      // We are trying to increase probability for correct result.
-      RETURN_IF_ERROR(WaitForCompletion());
-    }
-  }
-
-  RETURN_IF_ERROR(WaitForCompletion());
-
-  // To release memory of some kernel pool on Mali.
-  if (device_info.IsMali())
-  {
-    RETURN_IF_ERROR(kernel.ReInit());
-  }
-
-  int minimum_index = 0;
-  double minimum_time = std::numeric_limits<double>::max();
-  if (possible_bug_with_events)
-  { // we will try to cut out suspicious results
-    double average_time = 0.0;
-    int average_samples_count = 0;
-    for (size_t i = 0; i < work_group_sizes.size(); ++i)
-    {
-      if (events_[i].GetEventTimeMs() < 100 * 1000)
-      { // 100 sec
-        average_time += events_[i].GetEventTimeMs();
-        average_samples_count++;
-      }
-    }
-    if (average_samples_count == 0)
-    {
-      throw std::runtime_error("It cannot be divided by zero");
-    }
-    else
-    {
-      average_time /= average_samples_count;
-    }
-
-    for (size_t i = 0; i < work_group_sizes.size(); ++i)
-    {
-      double time = events_[i].GetEventTimeMs();
-      if (time < minimum_time && time >= 0.1 * average_time)
-      {
-        minimum_index = i;
-        minimum_time = time;
-      }
-    }
-  }
-  else
-  {
-    for (size_t i = 0; i < work_group_sizes.size(); ++i)
-    {
-      double time = events_[i].GetEventTimeMs();
-      if (time < minimum_time)
-      {
-        minimum_index = i;
-        minimum_time = time;
-      }
-    }
-  }
-
-  *index = minimum_index;
-
-  return absl::OkStatus();
-}
-
-absl::Status CreateCLCommandQueue(const CLDevice &device, const CLContext &context,
-                                  CLCommandQueue *result)
-{
-  int error_code;
-  cl_command_queue queue = clCreateCommandQueue(context.context(), device.id(), 0, &error_code);
-  if (!queue)
-  {
-    return absl::UnknownError(
-      absl::StrCat("Failed to create a command queue - ", CLErrorCodeToString(error_code)));
-  }
-  *result = CLCommandQueue(queue, true);
-  return absl::OkStatus();
-}
-
-double ProfilingCommandQueue::GetQueueExecutionTimeMs() const
-{
-  const uint64_t start = events_.front().GetStartedTimeNs();
-  const uint64_t end = events_.back().GetFinishedTimeNs();
-  const uint64_t time_ns = (end - start);
-
-  return static_cast<double>(time_ns) / 1000000.0;
-}
-
-double ProfilingCommandQueue::GetSumOfEventsTimeMs() const
-{
-  double sum = 0.0;
-  for (uint32_t i = 0; i < events_.size(); ++i)
-  {
-    sum += events_[i].GetEventTimeMs();
-  }
-  return sum;
-}
-
-absl::Status CreateProfilingCommandQueue(const CLDevice &device, const CLContext &context,
-                                         ProfilingCommandQueue *result)
-{
-  int error_code;
-  cl_command_queue queue =
-    clCreateCommandQueue(context.context(), device.id(), CL_QUEUE_PROFILING_ENABLE, &error_code);
-  if (!queue)
-  {
-    return absl::UnknownError(
-      absl::StrCat("Failed to create a command queue - ", CLErrorCodeToString(error_code)));
-  }
-
-  *result = ProfilingCommandQueue(queue);
-  return absl::OkStatus();
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClCommandQueue.h b/runtime/onert/backend/gpu_cl/open_cl/ClCommandQueue.h
deleted file mode 100644
index 81f93fd23..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClCommandQueue.h
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_COMMAND_QUEUE_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_COMMAND_QUEUE_H__
-
-#include <cstdint>
-#include <string>
-#include <vector>
-
-#include "absl/time/time.h"
-#include "ClContext.h"
-#include "ClDevice.h"
-#include "ClEvent.h"
-#include "ClKernel.h"
-#include "OpenclWrapper.h"
-#include "Types.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-struct ProfilingInfo
-{
-  struct DispatchInfo
-  {
-    std::string label;
-    absl::Duration duration;
-  };
-
-  std::vector<DispatchInfo> dispatches;
-
-  absl::Duration GetTotalTime() const;
-
-  // Returns report (string of lines delimited by \n)
-  // This method uses GPU counters and measure GPU time only.
-  // Report has next structure:
-  // Per kernel timing(K kernels):
-  //   conv2d 3.2ms
-  //   ...
-  // --------------------
-  // Accumulated time per operation type:
-  //   conv2d - 14.5ms
-  //   ....
-  // --------------------
-  // Ideal total time: 23.4ms // Total time for all kernels
-  std::string GetDetailedReport() const;
-};
-
-// A wrapper around opencl command queue
-class CLCommandQueue
-{
-public:
-  CLCommandQueue() {}
-  CLCommandQueue(cl_command_queue queue, bool has_ownership);
-
-  // Move only
-  CLCommandQueue(CLCommandQueue &&queue);
-  CLCommandQueue &operator=(CLCommandQueue &&queue);
-  CLCommandQueue(const CLCommandQueue &) = delete;
-  CLCommandQueue &operator=(const CLCommandQueue &) = delete;
-
-  virtual ~CLCommandQueue();
-
-  cl_command_queue queue() const { return queue_; }
-
-  virtual absl::Status Dispatch(const CLKernel &kernel, const int3 &work_groups_count,
-                                const int3 &work_group_size);
-
-  absl::Status Dispatch(const CLKernel &kernel, const int3 &work_groups_count,
-                        const int3 &work_group_size, CLEvent *event);
-
-  absl::Status EnqueueEvent(CLEvent *event);
-
-  absl::Status EnqueueWriteImage(cl_mem memory, int3 region, const void *data);
-  absl::Status EnqueueReadImage(cl_mem memory, int3 region, void *data);
-
-  absl::Status EnqueueWriteBuffer(cl_mem memory, size_t size_in_bytes, const void *data);
-  absl::Status EnqueueReadBuffer(cl_mem memory, size_t size_in_bytes, void *data);
-
-  absl::Status WaitForCompletion();
-
-protected:
-  void Release();
-
-  cl_command_queue queue_ = nullptr;
-  bool has_ownership_ = false;
-};
-
-class ProfilingCommandQueue : public CLCommandQueue
-{
-public:
-  ProfilingCommandQueue() {}
-  explicit ProfilingCommandQueue(cl_command_queue queue);
-
-  // Move only
-  ProfilingCommandQueue(ProfilingCommandQueue &&queue);
-  ProfilingCommandQueue &operator=(ProfilingCommandQueue &&queue);
-  ProfilingCommandQueue(const ProfilingCommandQueue &) = delete;
-  ProfilingCommandQueue &operator=(const ProfilingCommandQueue &) = delete;
-
-  absl::Status Dispatch(const CLKernel &kernel, const int3 &work_groups_count,
-                        const int3 &work_group_size) override;
-
-  // will write index for fastest work_group among work_group_sizes
-  absl::Status GetBestWorkGroupIndex(const CLKernel &kernel, const DeviceInfo &device_info,
-                                     const std::vector<int3> &work_groups_count,
-                                     const std::vector<int3> &work_group_sizes, int *index);
-
-  // call ResetMeasurements() to start new seriese of measurements
-  void ResetMeasurements();
-
-  double GetQueueExecutionTimeMs() const;
-
-  // Difference from GetQueueExecutionTimeMs is that this number doesn't include
-  // time between kernels(kernels launches or preparing) on GPU. Usually, this
-  // time should be 5-10% better than GetQueueExecutionTimeMs, because 5-10%
-  // spend on something else(maybe kernels launches or preparing)
-  double GetSumOfEventsTimeMs() const;
-
-  // This label will be used for all subsequent dispatches.
-  void SetEventsLabel(const std::string &name);
-
-private:
-  std::vector<CLEvent> events_;
-  std::string current_label_;
-};
-
-absl::Status CreateCLCommandQueue(const CLDevice &device, const CLContext &context,
-                                  CLCommandQueue *result);
-
-absl::Status CreateProfilingCommandQueue(const CLDevice &device, const CLContext &context,
-                                         ProfilingCommandQueue *result);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_COMMAND_QUEUE_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClContext.cc b/runtime/onert/backend/gpu_cl/open_cl/ClContext.cc
deleted file mode 100644
index 3289ff914..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClContext.cc
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ClContext.h"
-
-#include "absl/strings/str_cat.h"
-#include "ClImageFormat.h"
-#include "Util.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-std::vector<cl_image_format> GetSupportedImage2DFormats(cl_context context, cl_mem_flags flags)
-{
-  cl_uint num_image_formats;
-  cl_int error = clGetSupportedImageFormats(context, flags, CL_MEM_OBJECT_IMAGE2D, 0, nullptr,
-                                            &num_image_formats);
-  if (error != CL_SUCCESS)
-  {
-    return {};
-  }
-
-  std::vector<cl_image_format> result(num_image_formats);
-  error = clGetSupportedImageFormats(context, flags, CL_MEM_OBJECT_IMAGE2D, num_image_formats,
-                                     &result[0], nullptr);
-  if (error != CL_SUCCESS)
-  {
-    return {};
-  }
-  return result;
-}
-
-bool IsEqualToImageFormat(cl_image_format image_format, DataType data_type, int num_channels)
-{
-  return image_format.image_channel_data_type == ToImageChannelType(data_type) &&
-         image_format.image_channel_order == ToChannelOrder(num_channels);
-}
-
-void AddSupportedImageFormats(cl_context context, DeviceInfo *info)
-{
-  auto supported_formats = GetSupportedImage2DFormats(context, CL_MEM_READ_WRITE);
-  for (auto format : supported_formats)
-  {
-    info->supports_r_f16_tex2d =
-      info->supports_r_f16_tex2d || IsEqualToImageFormat(format, DataType::FLOAT16, 1);
-    info->supports_rg_f16_tex2d =
-      info->supports_rg_f16_tex2d || IsEqualToImageFormat(format, DataType::FLOAT16, 2);
-    info->supports_rgb_f16_tex2d =
-      info->supports_rgb_f16_tex2d || IsEqualToImageFormat(format, DataType::FLOAT16, 3);
-    info->supports_rgba_f16_tex2d =
-      info->supports_rgba_f16_tex2d || IsEqualToImageFormat(format, DataType::FLOAT16, 4);
-    info->supports_r_f32_tex2d =
-      info->supports_r_f32_tex2d || IsEqualToImageFormat(format, DataType::FLOAT32, 1);
-    info->supports_rg_f32_tex2d =
-      info->supports_rg_f32_tex2d || IsEqualToImageFormat(format, DataType::FLOAT32, 2);
-    info->supports_rgb_f32_tex2d =
-      info->supports_rgb_f32_tex2d || IsEqualToImageFormat(format, DataType::FLOAT32, 3);
-    info->supports_rgba_f32_tex2d =
-      info->supports_rgba_f32_tex2d || IsEqualToImageFormat(format, DataType::FLOAT32, 4);
-  }
-}
-
-absl::Status CreateCLContext(const CLDevice &device, cl_context_properties *properties,
-                             CLContext *result)
-{
-  int error_code;
-  cl_device_id device_id = device.id();
-  cl_context context = clCreateContext(properties, 1, &device_id, nullptr, nullptr, &error_code);
-  if (!context)
-  {
-    return absl::UnknownError(
-      absl::StrCat("Failed to create a compute context - ", CLErrorCodeToString(error_code)));
-  }
-  AddSupportedImageFormats(context, &device.info_);
-
-  *result = CLContext(context, true);
-  return absl::OkStatus();
-}
-
-} // namespace
-
-CLContext::CLContext(cl_context context, bool has_ownership)
-  : context_(context), has_ownership_(has_ownership)
-{
-}
-
-CLContext::CLContext(CLContext &&context)
-  : context_(context.context_), has_ownership_(context.has_ownership_)
-{
-  context.context_ = nullptr;
-}
-
-CLContext &CLContext::operator=(CLContext &&context)
-{
-  if (this != &context)
-  {
-    Release();
-    std::swap(context_, context.context_);
-    has_ownership_ = context.has_ownership_;
-  }
-  return *this;
-}
-
-CLContext::~CLContext() { Release(); }
-
-void CLContext::Release()
-{
-  if (has_ownership_ && context_)
-  {
-    clReleaseContext(context_);
-    context_ = nullptr;
-  }
-}
-
-bool CLContext::IsFloatTexture2DSupported(int num_channels, DataType data_type,
-                                          cl_mem_flags flags) const
-{
-  auto supported_formats = GetSupportedImage2DFormats(context_, flags);
-  for (auto format : supported_formats)
-  {
-    if (format.image_channel_data_type == ToImageChannelType(data_type) &&
-        format.image_channel_order == ToChannelOrder(num_channels))
-    {
-      return true;
-    }
-  }
-
-  return false;
-}
-
-absl::Status CreateCLContext(const CLDevice &device, CLContext *result)
-{
-  return CreateCLContext(device, nullptr, result);
-}
-
-absl::Status CreateCLGLContext(const CLDevice &device, cl_context_properties egl_context,
-                               cl_context_properties egl_display, CLContext *result)
-{
-  if (!device.SupportsExtension("cl_khr_gl_sharing"))
-  {
-    return absl::UnavailableError("Device doesn't support CL-GL sharing.");
-  }
-  cl_context_properties platform = reinterpret_cast<cl_context_properties>(device.platform());
-  cl_context_properties props[] = {CL_GL_CONTEXT_KHR,
-                                   egl_context,
-                                   CL_EGL_DISPLAY_KHR,
-                                   egl_display,
-                                   CL_CONTEXT_PLATFORM,
-                                   platform,
-                                   0};
-  return CreateCLContext(device, props, result);
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClContext.h b/runtime/onert/backend/gpu_cl/open_cl/ClContext.h
deleted file mode 100644
index cf1d0d2d2..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClContext.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_CONTEXT_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_CONTEXT_H__
-
-#include "ClDevice.h"
-#include "OpenclWrapper.h"
-#include "DataType.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// A RAII wrapper around opencl context
-class CLContext
-{
-public:
-  CLContext() {}
-  CLContext(cl_context context, bool has_ownership);
-
-  // Move only
-  CLContext(CLContext &&context);
-  CLContext &operator=(CLContext &&context);
-  CLContext(const CLContext &) = delete;
-  CLContext &operator=(const CLContext &) = delete;
-
-  ~CLContext();
-
-  cl_context context() const { return context_; }
-
-  bool IsFloatTexture2DSupported(int num_channels, DataType data_type,
-                                 cl_mem_flags flags = CL_MEM_READ_WRITE) const;
-
-private:
-  void Release();
-
-  cl_context context_ = nullptr;
-  bool has_ownership_ = false;
-};
-
-absl::Status CreateCLContext(const CLDevice &device, CLContext *result);
-absl::Status CreateCLGLContext(const CLDevice &device, cl_context_properties egl_context,
-                               cl_context_properties egl_display, CLContext *result);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_CONTEXT_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClDevice.cc b/runtime/onert/backend/gpu_cl/open_cl/ClDevice.cc
deleted file mode 100644
index 8dede139c..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClDevice.cc
+++ /dev/null
@@ -1,448 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ClDevice.h"
-
-#include <algorithm>
-#include <string>
-#include <vector>
-
-#include "Util.h"
-#include "Status.h"
-
-#include "absl/strings/numbers.h"
-#include "absl/strings/str_split.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-template <> std::string GetDeviceInfo<std::string>(cl_device_id id, cl_device_info info)
-{
-  size_t size;
-  cl_int error = clGetDeviceInfo(id, info, 0, nullptr, &size);
-  if (error != CL_SUCCESS)
-  {
-    return "";
-  }
-
-  std::string result(size - 1, 0);
-  error = clGetDeviceInfo(id, info, size, &result[0], nullptr);
-  if (error != CL_SUCCESS)
-  {
-    return "";
-  }
-  return result;
-}
-
-namespace
-{
-template <typename T> T GetPlatformInfo(cl_platform_id id, cl_platform_info info)
-{
-  T result;
-  cl_int error = clGetPlatformInfo(id, info, sizeof(T), &result, nullptr);
-  if (error != CL_SUCCESS)
-  {
-    return -1;
-  }
-  return result;
-}
-
-std::string GetPlatformInfo(cl_platform_id id, cl_platform_info info)
-{
-  size_t size;
-  cl_int error = clGetPlatformInfo(id, info, 0, nullptr, &size);
-  if (error != CL_SUCCESS)
-  {
-    return "";
-  }
-
-  std::string result(size - 1, 0);
-  error = clGetPlatformInfo(id, info, size, &result[0], nullptr);
-  if (error != CL_SUCCESS)
-  {
-    return "";
-  }
-  return result;
-}
-
-void GetDeviceWorkDimsSizes(cl_device_id id, int3 *result)
-{
-  int dims_count = GetDeviceInfo<cl_uint>(id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS);
-  if (dims_count < 3)
-  {
-    return;
-  }
-  std::vector<size_t> limits(dims_count);
-  cl_int error = clGetDeviceInfo(id, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * dims_count,
-                                 limits.data(), nullptr);
-  if (error != CL_SUCCESS)
-  {
-    return;
-  }
-  // dims_count must be at least 3 according to spec
-  result->x = limits[0];
-  result->y = limits[1];
-  result->z = limits[2];
-}
-
-OpenCLVersion ParseCLVersion(const std::string &version)
-{
-  const auto first_dot_pos = version.find_first_of('.');
-  if (first_dot_pos == std::string::npos)
-  {
-    return OpenCLVersion::CL_1_0;
-  }
-  const int major = version[first_dot_pos - 1] - '0';
-  const int minor = version[first_dot_pos + 1] - '0';
-
-  if (major == 1)
-  {
-    if (minor == 2)
-    {
-      return OpenCLVersion::CL_1_2;
-    }
-    else if (minor == 1)
-    {
-      return OpenCLVersion::CL_1_1;
-    }
-    else
-    {
-      return OpenCLVersion::CL_1_0;
-    }
-  }
-  else if (major == 2)
-  {
-    if (minor == 2)
-    {
-      return OpenCLVersion::CL_2_2;
-    }
-    else if (minor == 1)
-    {
-      return OpenCLVersion::CL_2_1;
-    }
-    else
-    {
-      return OpenCLVersion::CL_2_0;
-    }
-  }
-  else if (major == 3)
-  {
-    return OpenCLVersion::CL_3_0;
-  }
-  else
-  {
-    return OpenCLVersion::CL_1_0;
-  }
-}
-
-Vendor ParseVendor(const std::string &device_name, const std::string &vendor_name)
-{
-  std::string d_name = device_name;
-  std::string v_name = vendor_name;
-  std::transform(d_name.begin(), d_name.end(), d_name.begin(), ::tolower);
-  std::transform(v_name.begin(), v_name.end(), v_name.begin(), ::tolower);
-  if (d_name.find("qualcomm") != std::string::npos || v_name.find("qualcomm") != std::string::npos)
-  {
-    return Vendor::kQualcomm;
-  }
-  else if (d_name.find("mali") != std::string::npos || v_name.find("mali") != std::string::npos)
-  {
-    return Vendor::kMali;
-  }
-  else if (d_name.find("power") != std::string::npos || v_name.find("power") != std::string::npos)
-  {
-    return Vendor::kPowerVR;
-  }
-  else if (d_name.find("nvidia") != std::string::npos || v_name.find("nvidia") != std::string::npos)
-  {
-    return Vendor::kNvidia;
-  }
-  else if (d_name.find("advanced micro devices") != std::string::npos ||
-           v_name.find("advanced micro devices") != std::string::npos)
-  {
-    return Vendor::kAMD;
-  }
-  else if (d_name.find("intel") != std::string::npos || v_name.find("intel") != std::string::npos)
-  {
-    return Vendor::kIntel;
-  }
-  else
-  {
-    return Vendor::kUnknown;
-  }
-}
-
-// check that gpu_version belong to range min_version-max_version
-// min_version is included and max_version is excluded.
-bool IsGPUVersionInRange(int gpu_version, int min_version, int max_version)
-{
-  return gpu_version >= min_version && gpu_version < max_version;
-}
-} // namespace
-
-DeviceInfo DeviceInfoFromDeviceID(cl_device_id id)
-{
-  DeviceInfo info;
-  const auto device_name = GetDeviceInfo<std::string>(id, CL_DEVICE_NAME);
-  const auto vendor_name = GetDeviceInfo<std::string>(id, CL_DEVICE_VENDOR);
-  const auto opencl_c_version = GetDeviceInfo<std::string>(id, CL_DEVICE_OPENCL_C_VERSION);
-  info.vendor = ParseVendor(device_name, vendor_name);
-  if (info.vendor == Vendor::kQualcomm)
-  {
-    info.adreno_info = AdrenoInfo(opencl_c_version);
-  }
-  else if (info.vendor == Vendor::kMali)
-  {
-    info.mali_info = MaliInfo(device_name);
-  }
-  info.cl_version = ParseCLVersion(opencl_c_version);
-  info.extensions = absl::StrSplit(GetDeviceInfo<std::string>(id, CL_DEVICE_EXTENSIONS), ' ');
-
-  info.supports_fp16 = false;
-  info.supports_image3d_writes = false;
-  for (const auto &ext : info.extensions)
-  {
-    if (ext == "cl_khr_fp16")
-    {
-      info.supports_fp16 = true;
-    }
-    if (ext == "cl_khr_3d_image_writes")
-    {
-      info.supports_image3d_writes = true;
-    }
-  }
-
-  cl_device_fp_config f32_config =
-    GetDeviceInfo<cl_device_fp_config>(id, CL_DEVICE_SINGLE_FP_CONFIG);
-  info.supports_fp32_rtn = f32_config & CL_FP_ROUND_TO_NEAREST;
-
-  if (info.supports_fp16)
-  {
-    cl_device_fp_config f16_config;
-    auto status = GetDeviceInfo<cl_device_fp_config>(id, CL_DEVICE_HALF_FP_CONFIG, &f16_config);
-    // AMD supports cl_khr_fp16 but CL_DEVICE_HALF_FP_CONFIG is empty.
-    if (status.ok() && info.vendor != Vendor::kAMD)
-    {
-      info.supports_fp16_rtn = f16_config & CL_FP_ROUND_TO_NEAREST;
-    }
-    else
-    { // happens on PowerVR
-      f16_config = f32_config;
-      info.supports_fp16_rtn = info.supports_fp32_rtn;
-    }
-  }
-  else
-  {
-    info.supports_fp16_rtn = false;
-  }
-
-  if (info.vendor == Vendor::kPowerVR && !info.supports_fp16)
-  {
-    // PowerVR doesn't have full support of fp16 and so doesn't list this
-    // extension. But it can support fp16 in MADs and as buffers/textures types,
-    // so we will use it.
-    info.supports_fp16 = true;
-    info.supports_fp16_rtn = info.supports_fp32_rtn;
-  }
-
-  if (!info.supports_image3d_writes &&
-      ((info.vendor == Vendor::kQualcomm &&
-        IsGPUVersionInRange(info.adreno_info.gpu_version, 400, 500)) ||
-       info.vendor == Vendor::kNvidia))
-  {
-    // in local tests Adreno 430 can write in image 3d, at least on small sizes,
-    // but it doesn't have cl_khr_3d_image_writes in list of available
-    // extensions
-    // The same for NVidia
-    info.supports_image3d_writes = true;
-  }
-  info.compute_units_count = GetDeviceInfo<cl_uint>(id, CL_DEVICE_MAX_COMPUTE_UNITS);
-  info.image2d_max_width = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_WIDTH);
-  info.image2d_max_height = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT);
-  info.buffer_max_size = GetDeviceInfo<cl_ulong>(id, CL_DEVICE_MAX_MEM_ALLOC_SIZE);
-  if (info.cl_version >= OpenCLVersion::CL_1_2)
-  {
-    info.image_buffer_max_size = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE);
-    info.image_array_max_layers = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE);
-  }
-  info.image3d_max_width = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE3D_MAX_WIDTH);
-  info.image3d_max_height = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT);
-  info.image3d_max_depth = GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE3D_MAX_DEPTH);
-  int3 max_work_group_sizes;
-  GetDeviceWorkDimsSizes(id, &max_work_group_sizes);
-  info.max_work_group_size_x = max_work_group_sizes.x;
-  info.max_work_group_size_y = max_work_group_sizes.y;
-  info.max_work_group_size_z = max_work_group_sizes.z;
-
-  if (info.IsIntel())
-  {
-    if (info.SupportsExtension("cl_intel_required_subgroup_size"))
-    {
-      size_t sub_groups_count;
-      cl_int status = clGetDeviceInfo(id, 0x4108 /*CL_DEVICE_SUB_GROUP_SIZES_INTEL*/, 0, nullptr,
-                                      &sub_groups_count);
-      if (status == CL_SUCCESS)
-      {
-        std::vector<size_t> sub_group_sizes(sub_groups_count);
-        status =
-          clGetDeviceInfo(id, 0x4108 /*CL_DEVICE_SUB_GROUP_SIZES_INTEL*/,
-                          sizeof(size_t) * sub_groups_count, sub_group_sizes.data(), nullptr);
-        if (status == CL_SUCCESS)
-        {
-          for (size_t i = 0; i < sub_groups_count; ++i)
-          {
-            info.supported_subgroup_sizes.push_back(sub_group_sizes[i]);
-          }
-        }
-      }
-    }
-  }
-  return info;
-}
-
-CLDevice::CLDevice(cl_device_id id, cl_platform_id platform_id)
-  : info_(DeviceInfoFromDeviceID(id)), id_(id), platform_id_(platform_id)
-{
-}
-
-CLDevice::CLDevice(const CLDevice &device)
-  : info_(device.info_), id_(device.id_), platform_id_(device.platform_id_)
-{
-}
-
-CLDevice &CLDevice::operator=(const CLDevice &device)
-{
-  if (this != &device)
-  {
-    info_ = device.info_;
-    id_ = device.id_;
-    platform_id_ = device.platform_id_;
-  }
-  return *this;
-}
-
-CLDevice::CLDevice(CLDevice &&device)
-  : info_(std::move(device.info_)), id_(device.id_), platform_id_(device.platform_id_)
-{
-  device.id_ = nullptr;
-  device.platform_id_ = nullptr;
-}
-
-CLDevice &CLDevice::operator=(CLDevice &&device)
-{
-  if (this != &device)
-  {
-    id_ = nullptr;
-    platform_id_ = nullptr;
-    info_ = std::move(device.info_);
-    std::swap(id_, device.id_);
-    std::swap(platform_id_, device.platform_id_);
-  }
-  return *this;
-}
-
-bool CLDevice::SupportsFP16() const { return info_.supports_fp16; }
-
-bool CLDevice::SupportsExtension(const std::string &extension) const
-{
-  return info_.SupportsExtension(extension);
-}
-
-bool CLDevice::SupportsTextureArray() const { return info_.SupportsTextureArray(); }
-
-bool CLDevice::SupportsImageBuffer() const { return info_.SupportsImageBuffer(); }
-
-bool CLDevice::SupportsImage3D() const { return info_.SupportsImage3D(); }
-
-bool CLDevice::SupportsFP32RTN() const { return info_.supports_fp32_rtn; }
-
-bool CLDevice::SupportsFP16RTN() const { return info_.supports_fp16_rtn; }
-
-std::string CLDevice::GetPlatformVersion() const
-{
-  return GetPlatformInfo(platform_id_, CL_PLATFORM_VERSION);
-}
-
-bool CLDevice::IsCL20OrHigher() const { return info_.IsCL20OrHigher(); }
-
-bool CLDevice::SupportsSubGroupWithSize(int sub_group_size) const
-{
-  return info_.SupportsSubGroupWithSize(sub_group_size);
-}
-
-bool CLDevice::IsAdreno() const { return info_.IsAdreno(); }
-
-bool CLDevice::IsAdreno3xx() const { return info_.IsAdreno3xx(); }
-
-bool CLDevice::IsAdreno4xx() const { return info_.IsAdreno4xx(); }
-
-bool CLDevice::IsAdreno5xx() const { return info_.IsAdreno5xx(); }
-
-bool CLDevice::IsAdreno6xx() const { return info_.IsAdreno6xx(); }
-
-bool CLDevice::IsAdreno6xxOrHigher() const { return info_.IsAdreno6xxOrHigher(); }
-
-bool CLDevice::IsPowerVR() const { return info_.IsPowerVR(); }
-
-bool CLDevice::IsNvidia() const { return info_.IsNvidia(); }
-
-bool CLDevice::IsMali() const { return info_.IsMali(); }
-
-bool CLDevice::IsAMD() const { return info_.IsAMD(); }
-
-bool CLDevice::IsIntel() const { return info_.IsIntel(); }
-
-bool CLDevice::SupportsOneLayerTextureArray() const { return info_.SupportsOneLayerTextureArray(); }
-
-void CLDevice::DisableOneLayerTextureArray()
-{
-  info_.adreno_info.support_one_layer_texture_array = false;
-}
-
-absl::Status CreateDefaultGPUDevice(CLDevice *result)
-{
-  cl_uint num_platforms;
-  clGetPlatformIDs(0, nullptr, &num_platforms);
-  if (num_platforms == 0)
-  {
-    return absl::UnknownError("No supported OpenCL platform.");
-  }
-  std::vector<cl_platform_id> platforms(num_platforms);
-  clGetPlatformIDs(num_platforms, platforms.data(), nullptr);
-
-  cl_platform_id platform_id = platforms[0];
-  cl_uint num_devices;
-  clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 0, nullptr, &num_devices);
-  if (num_devices == 0)
-  {
-    return absl::UnknownError("No GPU on current platform.");
-  }
-
-  std::vector<cl_device_id> devices(num_devices);
-  clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, num_devices, devices.data(), nullptr);
-
-  *result = CLDevice(devices[0], platform_id);
-  return absl::OkStatus();
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClDevice.h b/runtime/onert/backend/gpu_cl/open_cl/ClDevice.h
deleted file mode 100644
index 6e740fe97..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClDevice.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_DEVICE_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_DEVICE_H__
-
-#include <string>
-#include <vector>
-
-#include "DeviceInfo.h"
-#include "OpenclWrapper.h"
-#include "Util.h"
-#include "Types.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// A wrapper around opencl device id
-class CLDevice
-{
-public:
-  CLDevice() = default;
-  CLDevice(cl_device_id id, cl_platform_id platform_id);
-
-  CLDevice(CLDevice &&device);
-  CLDevice &operator=(CLDevice &&device);
-  CLDevice(const CLDevice &);
-  CLDevice &operator=(const CLDevice &);
-
-  ~CLDevice() {}
-
-  cl_device_id id() const { return id_; }
-  cl_platform_id platform() const { return platform_id_; }
-  std::string GetPlatformVersion() const;
-
-  Vendor vendor() const { return info_.vendor; }
-  OpenCLVersion cl_version() const { return info_.cl_version; }
-  bool SupportsFP16() const;
-  bool SupportsTextureArray() const;
-  bool SupportsImageBuffer() const;
-  bool SupportsImage3D() const;
-  bool SupportsExtension(const std::string &extension) const;
-  bool SupportsFP32RTN() const;
-  bool SupportsFP16RTN() const;
-  bool IsCL20OrHigher() const;
-  bool SupportsSubGroupWithSize(int sub_group_size) const;
-  bool IsAdreno() const;
-  bool IsAdreno3xx() const;
-  bool IsAdreno4xx() const;
-  bool IsAdreno5xx() const;
-  bool IsAdreno6xx() const;
-  bool IsAdreno6xxOrHigher() const;
-  bool IsPowerVR() const;
-  bool IsNvidia() const;
-  bool IsMali() const;
-  bool IsAMD() const;
-  bool IsIntel() const;
-
-  // To track bug on some Adreno. b/131099086
-  bool SupportsOneLayerTextureArray() const;
-  void DisableOneLayerTextureArray();
-
-  const DeviceInfo &GetInfo() const { return info_; }
-  // We update device info during context creation, so as supported texture
-  // formats can be requested from context only.
-  mutable DeviceInfo info_;
-
-private:
-  cl_device_id id_ = nullptr;
-  cl_platform_id platform_id_ = nullptr;
-};
-
-absl::Status CreateDefaultGPUDevice(CLDevice *result);
-
-template <typename T> T GetDeviceInfo(cl_device_id id, cl_device_info info)
-{
-  T result;
-  cl_int error = clGetDeviceInfo(id, info, sizeof(T), &result, nullptr);
-  if (error != CL_SUCCESS)
-  {
-    return -1;
-  }
-  return result;
-}
-
-template <typename T> absl::Status GetDeviceInfo(cl_device_id id, cl_device_info info, T *result)
-{
-  cl_int error = clGetDeviceInfo(id, info, sizeof(T), result, nullptr);
-  if (error != CL_SUCCESS)
-  {
-    return absl::InvalidArgumentError(CLErrorCodeToString(error));
-  }
-  return absl::OkStatus();
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_DEVICE_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClErrors.h b/runtime/onert/backend/gpu_cl/open_cl/ClErrors.h
deleted file mode 100644
index 48cd2fb00..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClErrors.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_ERRORS_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_ERRORS_H__
-
-#include <string>
-
-#include "Util.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// @return if error_code is success, then return OK status. Otherwise translates
-// error code into a message.
-inline absl::Status GetOpenCLError(cl_int error_code)
-{
-  if (error_code == CL_SUCCESS)
-  {
-    return absl::OkStatus();
-  }
-  return absl::InternalError("OpenCL error: " + CLErrorCodeToString(error_code));
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_ERRORS_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClEvent.cc b/runtime/onert/backend/gpu_cl/open_cl/ClEvent.cc
deleted file mode 100644
index beb64a9a8..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClEvent.cc
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ClEvent.h"
-
-#include "OpenclWrapper.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-CLEvent::CLEvent(cl_event event) : event_(event) {}
-
-CLEvent::CLEvent(CLEvent &&event) : event_(event.event_), name_(std::move(event.name_))
-{
-  event.event_ = nullptr;
-}
-
-CLEvent &CLEvent::operator=(CLEvent &&event)
-{
-  if (this != &event)
-  {
-    Release();
-    std::swap(event_, event.event_);
-    name_ = std::move(event.name_);
-  }
-  return *this;
-}
-
-uint64_t CLEvent::GetStartedTimeNs() const
-{
-  cl_ulong time_ns;
-  clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &time_ns, nullptr);
-  return time_ns;
-}
-
-uint64_t CLEvent::GetFinishedTimeNs() const
-{
-  cl_ulong time_ns;
-  clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &time_ns, nullptr);
-  return time_ns;
-}
-
-double CLEvent::GetEventTimeMs() const
-{
-  const uint64_t start = GetStartedTimeNs();
-  const uint64_t end = GetFinishedTimeNs();
-  const uint64_t time_ns = (end - start);
-
-  return static_cast<double>(time_ns) * 1e-6;
-}
-
-uint64_t CLEvent::GetEventTimeNs() const { return GetFinishedTimeNs() - GetStartedTimeNs(); }
-
-void CLEvent::SetName(const std::string &name) { name_ = name; }
-
-void CLEvent::Wait() const { clWaitForEvents(1, &event_); }
-
-CLEvent::~CLEvent() { Release(); }
-
-void CLEvent::Release()
-{
-  if (event_)
-  {
-    clReleaseEvent(event_);
-    event_ = nullptr;
-  }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClEvent.h b/runtime/onert/backend/gpu_cl/open_cl/ClEvent.h
deleted file mode 100644
index 265409ffe..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClEvent.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_EVENT_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_EVENT_H__
-
-#include <cstdint>
-#include <string>
-
-#include "OpenclWrapper.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// A RAII wrapper around opencl event
-class CLEvent
-{
-public:
-  CLEvent() {}
-  explicit CLEvent(cl_event event);
-
-  // Move only
-  CLEvent(CLEvent &&event);
-  CLEvent &operator=(CLEvent &&event);
-  CLEvent(const CLEvent &) = delete;
-  CLEvent &operator=(const CLEvent &) = delete;
-
-  ~CLEvent();
-
-  uint64_t GetStartedTimeNs() const;
-  uint64_t GetFinishedTimeNs() const;
-
-  double GetEventTimeMs() const;
-  uint64_t GetEventTimeNs() const;
-
-  void Wait() const;
-
-  cl_event event() const { return event_; }
-
-  bool is_valid() const { return event_ != nullptr; }
-
-  void SetName(const std::string &name);
-  std::string GetName() const { return name_; }
-
-private:
-  void Release();
-
-  cl_event event_ = nullptr;
-
-  std::string name_; // optional, for profiling mostly
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_EVENT_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClImageFormat.cc b/runtime/onert/backend/gpu_cl/open_cl/ClImageFormat.cc
deleted file mode 100644
index 247a63d39..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClImageFormat.cc
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ClImageFormat.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-cl_channel_order ToChannelOrder(int num_channels)
-{
-  switch (num_channels)
-  {
-    case 1:
-      return CL_R;
-    case 2:
-      return CL_RG;
-    case 3:
-      return CL_RGB;
-    case 4:
-      return CL_RGBA;
-    default:
-      return -1;
-  }
-}
-
-cl_channel_type ToImageChannelType(DataType data_type)
-{
-  switch (data_type)
-  {
-    case DataType::FLOAT32:
-      return CL_FLOAT;
-    case DataType::FLOAT16:
-      return CL_HALF_FLOAT;
-    default:
-      return -1;
-  }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClImageFormat.h b/runtime/onert/backend/gpu_cl/open_cl/ClImageFormat.h
deleted file mode 100644
index a763746bd..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClImageFormat.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_IMAGE_FORMAT_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_IMAGE_FORMAT_H__
-
-#include "OpenclWrapper.h"
-#include "DataType.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-cl_channel_order ToChannelOrder(int num_channels);
-
-cl_channel_type ToImageChannelType(DataType data_type);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_IMAGE_FORMAT_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClKernel.cc b/runtime/onert/backend/gpu_cl/open_cl/ClKernel.cc
deleted file mode 100644
index f7745b9ac..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClKernel.cc
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ClKernel.h"
-
-#include "absl/strings/str_cat.h"
-#include "ClProgram.h"
-#include "Util.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-absl::Status GetKernelMaxWorkGroupSize(cl_kernel kernel, cl_device_id device_id, int *result)
-{
-  size_t max_work_group_size;
-  cl_int error_code = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE,
-                                               sizeof(size_t), &max_work_group_size, nullptr);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(absl::StrCat("Failed to get info CL_KERNEL_WORK_GROUP_SIZE ",
-                                           CLErrorCodeToString(error_code)));
-  }
-  *result = static_cast<int>(max_work_group_size);
-  return absl::OkStatus();
-}
-
-absl::Status GetKernelPrivateMemorySize(cl_kernel kernel, cl_device_id device_id, int *result)
-{
-  cl_ulong private_mem_size;
-  cl_int error_code = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_PRIVATE_MEM_SIZE,
-                                               sizeof(cl_ulong), &private_mem_size, nullptr);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(absl::StrCat("Failed to get info CL_KERNEL_PRIVATE_MEM_SIZE ",
-                                           CLErrorCodeToString(error_code)));
-  }
-  *result = static_cast<int>(private_mem_size);
-  return absl::OkStatus();
-}
-
-} // namespace
-
-CLKernel::CLKernel(CLKernel &&kernel)
-  : info_(kernel.info_), binding_counter_(kernel.binding_counter_),
-    function_name_(std::move(kernel.function_name_)), program_(kernel.program_),
-    kernel_(kernel.kernel_)
-{
-  kernel.kernel_ = nullptr;
-}
-
-CLKernel &CLKernel::operator=(CLKernel &&kernel)
-{
-  if (this != &kernel)
-  {
-    Release();
-    std::swap(info_, kernel.info_);
-    std::swap(binding_counter_, kernel.binding_counter_);
-    function_name_ = std::move(kernel.function_name_);
-    std::swap(program_, kernel.program_);
-    std::swap(kernel_, kernel.kernel_);
-  }
-  return *this;
-}
-
-CLKernel::~CLKernel() { Release(); }
-
-absl::Status CLKernel::ReInit() const
-{
-  clReleaseKernel(kernel_);
-  cl_kernel *kern_ptr = const_cast<cl_kernel *>(&kernel_);
-  int error_code;
-  *kern_ptr = clCreateKernel(program_, function_name_.c_str(), &error_code);
-  if (!kernel_ || error_code != CL_SUCCESS)
-  {
-    *kern_ptr = nullptr;
-    return absl::UnknownError(
-      absl::StrCat("Failed to create ", function_name_, CLErrorCodeToString(error_code)));
-  }
-  return absl::OkStatus();
-}
-
-void CLKernel::Release()
-{
-  if (kernel_)
-  {
-    clReleaseKernel(kernel_);
-    clReleaseProgram(program_);
-    kernel_ = nullptr;
-  }
-}
-
-absl::Status CLKernel::CreateFromProgram(const CLProgram &program, const std::string &function_name)
-{
-  int error_code;
-  function_name_ = function_name;
-  kernel_ = clCreateKernel(program.program(), function_name.c_str(), &error_code);
-  if (!kernel_ || error_code != CL_SUCCESS)
-  {
-    kernel_ = nullptr;
-    return absl::UnknownError(
-      absl::StrCat("Failed to create ", function_name, CLErrorCodeToString(error_code)));
-  }
-
-  program_ = program.program();
-  clRetainProgram(program_);
-
-  RETURN_IF_ERROR(
-    GetKernelPrivateMemorySize(kernel_, program.GetDeviceId(), &info_.private_memory_size));
-  RETURN_IF_ERROR(
-    GetKernelMaxWorkGroupSize(kernel_, program.GetDeviceId(), &info_.max_work_group_size));
-  return absl::OkStatus();
-}
-
-absl::Status CLKernel::SetMemory(int index, cl_mem memory)
-{
-  return SetBytes(index, &memory, sizeof(cl_mem));
-}
-
-absl::Status CLKernel::SetMemoryAuto(cl_mem memory)
-{
-  return SetBytesAuto(&memory, sizeof(cl_mem));
-}
-
-absl::Status CLKernel::SetBytes(int index, const void *ptr, int length) const
-{
-  const int error_code = clSetKernelArg(kernel_, index, length, ptr);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(
-      absl::StrCat("Failed to set kernel arguments - ", CLErrorCodeToString(error_code)));
-  }
-  return absl::OkStatus();
-}
-
-absl::Status CLKernel::SetBytesAuto(const void *ptr, int length)
-{
-  const int error_code = clSetKernelArg(kernel_, binding_counter_, length, ptr);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(absl::StrCat("Failed to set kernel arguments - ",
-                                           CLErrorCodeToString(error_code), "(at index - ",
-                                           binding_counter_, ")"));
-  }
-  binding_counter_++;
-  return absl::OkStatus();
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClKernel.h b/runtime/onert/backend/gpu_cl/open_cl/ClKernel.h
deleted file mode 100644
index 9575b7946..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClKernel.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_KERNEL_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_KERNEL_H__
-
-#include <string>
-
-#include "ClContext.h"
-#include "ClDevice.h"
-#include "ClProgram.h"
-#include "OpenclWrapper.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-struct KernelInfo
-{
-  int private_memory_size = 0;
-  int max_work_group_size = 0;
-};
-
-// Arguments binding to CLKernel can be manual or automatic
-// In manual you specify binding index explicitly
-// In automatic binding, index auto-incremented with every binding call
-// Also, if you use automatic mode you must call ResetBindingCounter
-//   before parameters binding
-class CLKernel
-{
-public:
-  CLKernel() {}
-
-  // Move only
-  CLKernel(CLKernel &&kernel);
-  CLKernel &operator=(CLKernel &&kernel);
-  CLKernel(const CLKernel &) = delete;
-  CLKernel &operator=(const CLKernel &) = delete;
-
-  ~CLKernel();
-
-  cl_kernel kernel() const { return kernel_; }
-
-  absl::Status CreateFromProgram(const CLProgram &program, const std::string &function_name);
-
-  absl::Status SetMemory(int index, cl_mem memory);
-  absl::Status SetMemoryAuto(cl_mem memory);
-  template <typename T> absl::Status SetBytes(int index, const T &value) const
-  {
-    return SetBytes(index, static_cast<const void *>(&value), sizeof(T));
-  }
-  template <typename T> absl::Status SetBytesAuto(const T &value)
-  {
-    return SetBytesAuto(static_cast<const void *>(&value), sizeof(T));
-  }
-
-  int GetBindingCounter() const { return binding_counter_; }
-  void ResetBindingCounter() { binding_counter_ = 0; }
-
-  // Do not use this function
-  // workaround for Mali memory leak
-  absl::Status ReInit() const;
-
-  KernelInfo info_;
-
-private:
-  void Release();
-  absl::Status SetBytes(int index, const void *ptr, int length) const;
-  absl::Status SetBytesAuto(const void *ptr, int length);
-
-  int binding_counter_ = -1;
-
-  std::string function_name_ = "";
-  // reference to program from which kernel was created
-  cl_program program_ = nullptr;
-  cl_kernel kernel_ = nullptr;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_KERNEL_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClMemory.cc b/runtime/onert/backend/gpu_cl/open_cl/ClMemory.cc
deleted file mode 100644
index fd3bc5579..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClMemory.cc
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ClMemory.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-cl_mem_flags ToClMemFlags(AccessType access_type)
-{
-  switch (access_type)
-  {
-    case AccessType::READ:
-      return CL_MEM_READ_ONLY;
-    case AccessType::WRITE:
-      return CL_MEM_WRITE_ONLY;
-    case AccessType::READ_WRITE:
-      return CL_MEM_READ_WRITE;
-    default:
-      throw std::runtime_error("Invalid AccessType");
-  }
-
-  return CL_MEM_READ_ONLY; // unreachable
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClMemory.h b/runtime/onert/backend/gpu_cl/open_cl/ClMemory.h
deleted file mode 100644
index c704ec71f..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClMemory.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_MEMORY_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_MEMORY_H__
-
-#include <algorithm>
-
-#include "OpenclWrapper.h"
-#include "AccessType.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// RAII wrapper for OpenCL memory object.
-//
-// Image is moveable but not copyable.
-class CLMemory
-{
-public:
-  // Creates invalid object.
-  CLMemory() : CLMemory(nullptr, false) {}
-
-  CLMemory(cl_mem memory, bool has_ownership) : memory_(memory), has_ownership_(has_ownership) {}
-
-  // Move-only
-  CLMemory(const CLMemory &) = delete;
-  CLMemory &operator=(const CLMemory &) = delete;
-  CLMemory(CLMemory &&image) : memory_(image.memory_), has_ownership_(image.has_ownership_)
-  {
-    image.memory_ = nullptr;
-  }
-
-  ~CLMemory() { Invalidate(); }
-
-  CLMemory &operator=(CLMemory &&image)
-  {
-    if (this != &image)
-    {
-      Invalidate();
-      std::swap(memory_, image.memory_);
-      has_ownership_ = image.has_ownership_;
-    }
-    return *this;
-  }
-
-  cl_mem memory() const { return memory_; }
-
-  bool is_valid() const { return memory_ != nullptr; }
-
-  // @return true if this object actually owns corresponding CL memory
-  //         and manages it's lifetime.
-  bool has_ownership() const { return has_ownership_; }
-
-  cl_mem Release()
-  {
-    cl_mem to_return = memory_;
-    memory_ = nullptr;
-    return to_return;
-  }
-
-private:
-  void Invalidate()
-  {
-    if (memory_ && has_ownership_)
-    {
-      clReleaseMemObject(memory_);
-    }
-    memory_ = nullptr;
-  }
-
-  cl_mem memory_ = nullptr;
-  bool has_ownership_ = false;
-};
-
-cl_mem_flags ToClMemFlags(AccessType access_type);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_MEMORY_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClProgram.cc b/runtime/onert/backend/gpu_cl/open_cl/ClProgram.cc
deleted file mode 100644
index c72b01a73..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClProgram.cc
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ClProgram.h"
-
-#include <cstdint>
-#include <cstring>
-#include <vector>
-
-#include "absl/strings/str_cat.h"
-#include "absl/types/span.h"
-#include "Util.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-std::string GetProgramBuildInfo(cl_program program, cl_device_id id, cl_program_build_info info)
-{
-  size_t size;
-  cl_int error_code = clGetProgramBuildInfo(program, id, info, 0, nullptr, &size);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::StrCat("Failed to GetProgramBuildInfo - ", CLErrorCodeToString(error_code));
-  }
-
-  std::string result(size - 1, 0);
-  error_code = clGetProgramBuildInfo(program, id, info, size, &result[0], nullptr);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::StrCat("Failed to GetProgramBuildInfo - ", CLErrorCodeToString(error_code));
-  }
-  return result;
-}
-
-absl::Status GetBinarySize(cl_program program, size_t *binary_size)
-{
-  cl_int error_code =
-    clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), binary_size, nullptr);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(
-      absl::StrCat("Failed to get program binary size - ", CLErrorCodeToString(error_code)));
-  }
-  return absl::OkStatus();
-}
-
-absl::Status BuildProgram(cl_program program, const CLDevice &device,
-                          const std::string &compiler_options)
-{
-  const int error_code =
-    clBuildProgram(program, 0, nullptr, compiler_options.c_str(), nullptr, nullptr);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(
-      absl::StrCat("Failed to build program executable - ", CLErrorCodeToString(error_code),
-                   GetProgramBuildInfo(program, device.id(), CL_PROGRAM_BUILD_LOG)));
-  }
-
-  return absl::OkStatus();
-}
-
-std::string CompilerOptionToString(const CLDevice &device, CompilerOptions option)
-{
-  switch (option)
-  {
-    case CompilerOptions::ADRENO_FULL_SIMD_LINE:
-      if (device.info_.adreno_info.gpu_version < 500)
-      {
-        return "-qcom-accelerate-16-bit";
-      }
-      else
-      {
-        return "-qcom-accelerate-16-bit=true";
-      }
-    case CompilerOptions::ADRENO_MORE_WAVES:
-      if (device.info_.adreno_info.gpu_version >= 500)
-      {
-        return "-qcom-accelerate-16-bit=false";
-      }
-      else
-      {
-        return "";
-      }
-    case CompilerOptions::POWERVR_FP16:
-      return "-cl-fast-relaxed-math";
-    case CompilerOptions::CL_OPT_DISABLE:
-      return "-cl-opt-disable";
-    case CompilerOptions::CL_2_0:
-      return "-cl-std=CL2.0";
-    case CompilerOptions::CL_3_0:
-      return "-cl-std=CL3.0";
-  }
-  return "";
-}
-
-} // namespace
-
-std::string CompilerOptionsToString(const CLDevice &device,
-                                    const std::vector<CompilerOptions> &compiler_options)
-{
-  std::string result;
-  for (auto option : compiler_options)
-  {
-    absl::StrAppend(&result, CompilerOptionToString(device, option), " ");
-  }
-  return result;
-}
-
-CLProgram::CLProgram(cl_program program, cl_device_id device_id)
-  : program_(program), device_id_(device_id)
-{
-}
-
-CLProgram::CLProgram(CLProgram &&program)
-  : program_(program.program_), device_id_(program.device_id_)
-{
-  program.program_ = nullptr;
-}
-
-CLProgram &CLProgram::operator=(CLProgram &&program)
-{
-  if (this != &program)
-  {
-    Release();
-    std::swap(program_, program.program_);
-    std::swap(device_id_, program.device_id_);
-  }
-  return *this;
-}
-
-CLProgram::~CLProgram() { Release(); }
-
-void CLProgram::Release()
-{
-  if (program_)
-  {
-    clReleaseProgram(program_);
-    program_ = nullptr;
-  }
-}
-
-absl::Status CLProgram::GetBinary(std::vector<uint8_t> *result) const
-{
-  size_t binary_size;
-  RETURN_IF_ERROR(GetBinarySize(program_, &binary_size));
-  result->resize(result->size() + binary_size);
-  uint8_t *binary_ptr = result->data() + result->size() - binary_size;
-  cl_int error_code =
-    clGetProgramInfo(program_, CL_PROGRAM_BINARIES, binary_size, &binary_ptr, nullptr);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(
-      absl::StrCat("Failed to get program binary - ", CLErrorCodeToString(error_code)));
-  }
-  return absl::OkStatus();
-}
-
-absl::Status CreateCLProgram(const std::string &code, const std::string &compiler_options,
-                             const CLContext &context, const CLDevice &device, CLProgram *result)
-{
-  int error_code;
-  const char *source = code.c_str();
-
-  cl_program program =
-    clCreateProgramWithSource(context.context(), 1, &source, nullptr, &error_code);
-  if (!program || error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(
-      absl::StrCat("Failed to create compute program - ", CLErrorCodeToString(error_code)));
-  }
-
-  *result = CLProgram(program, device.id());
-  RETURN_IF_ERROR(BuildProgram(program, device, compiler_options));
-  return absl::OkStatus();
-}
-
-absl::Status CreateCLProgramFromBinary(const CLContext &context, const CLDevice &device,
-                                       absl::Span<const uint8_t> binary, CLProgram *result)
-{
-  cl_int binary_status;
-  cl_int error_code;
-  cl_device_id devices_list[] = {device.id()};
-  size_t binary_size = binary.size();
-  const uint8_t *binary_pointer = binary.data();
-  cl_program program = clCreateProgramWithBinary(context.context(), 1, devices_list, &binary_size,
-                                                 &binary_pointer, &binary_status, &error_code);
-  if (binary_status != CL_SUCCESS)
-  {
-    return absl::UnknownError(absl::StrCat(
-      "Something wrong with binary after clCreateProgramWithBinary - ", binary_status));
-  }
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(
-      absl::StrCat("Failed to create program - ", CLErrorCodeToString(error_code)));
-  }
-  *result = CLProgram(program, device.id());
-  return BuildProgram(program, device, "");
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClProgram.h b/runtime/onert/backend/gpu_cl/open_cl/ClProgram.h
deleted file mode 100644
index d039ff698..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ClProgram.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_CL_PROGRAM_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_CL_PROGRAM_H__
-
-#include <cstdint>
-#include <vector>
-
-#include "ClContext.h"
-#include "ClDevice.h"
-#include "OpenclWrapper.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum class CompilerOptions
-{
-  // ADRENO_FULL_SIMD_LINE:
-  //   Adreno can have 2 sizes for SIMD size.
-  //   On Adreno 4xx/5xx it is 32/64, on Adreno6xx it is 64/128.
-  //   Some our algorithms actually rely on exact size, for example on full
-  //   SIMD size, so we need this define.
-  //   This define is actually -qcom-accelerate-16-bit, but it controls SIMD
-  //   size.
-  ADRENO_FULL_SIMD_LINE,
-  ADRENO_MORE_WAVES,
-  POWERVR_FP16,
-  CL_OPT_DISABLE,
-  CL_2_0,
-  CL_3_0,
-};
-
-std::string CompilerOptionsToString(const CLDevice &device,
-                                    const std::vector<CompilerOptions> &compiler_options);
-
-class CLProgram
-{
-public:
-  CLProgram() {}
-  CLProgram(cl_program program, cl_device_id device_id);
-
-  // Move only
-  CLProgram(CLProgram &&program);
-  CLProgram &operator=(CLProgram &&program);
-  CLProgram(const CLProgram &) = delete;
-  CLProgram &operator=(const CLProgram &) = delete;
-
-  ~CLProgram();
-
-  cl_program program() const { return program_; }
-
-  // Return the cl_device_id associated with the program object.
-  // This can be the device associated with context on which the program object
-  // has been created or can be device that was specified when a program object
-  // was created using clCreateProgramWithBinary.
-  cl_device_id GetDeviceId() const { return device_id_; }
-
-  absl::Status GetBinary(std::vector<uint8_t> *result) const;
-
-private:
-  void Release();
-
-  cl_program program_ = nullptr;
-
-  // reference
-  cl_device_id device_id_ = nullptr;
-};
-
-absl::Status CreateCLProgram(const std::string &code, const std::string &compiler_options,
-                             const CLContext &context, const CLDevice &device, CLProgram *result);
-
-absl::Status CreateCLProgramFromBinary(const CLContext &context, const CLDevice &device,
-                                       absl::Span<const uint8_t> binary, CLProgram *result);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_CL_PROGRAM_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/DataType.cc b/runtime/onert/backend/gpu_cl/open_cl/DataType.cc
deleted file mode 100644
index ce2aa8298..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/DataType.cc
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DataType.h"
-
-#include <stddef.h>
-#include <string>
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-size_t SizeOf(DataType data_type)
-{
-  switch (data_type)
-  {
-    case DataType::UINT8:
-    case DataType::INT8:
-      return 1;
-    case DataType::FLOAT16:
-    case DataType::INT16:
-    case DataType::UINT16:
-      return 2;
-    case DataType::FLOAT32:
-    case DataType::INT32:
-    case DataType::UINT32:
-      return 4;
-    case DataType::FLOAT64:
-    case DataType::INT64:
-    case DataType::UINT64:
-      return 8;
-    case DataType::UNKNOWN:
-      return 0;
-  }
-  return 0;
-}
-
-std::string ToString(DataType data_type)
-{
-  switch (data_type)
-  {
-    case DataType::FLOAT16:
-      return "float16";
-    case DataType::FLOAT32:
-      return "float32";
-    case DataType::FLOAT64:
-      return "float64";
-    case DataType::INT16:
-      return "int16";
-    case DataType::INT32:
-      return "int32";
-    case DataType::INT64:
-      return "int64";
-    case DataType::INT8:
-      return "int8";
-    case DataType::UINT16:
-      return "uint16";
-    case DataType::UINT32:
-      return "uint32";
-    case DataType::UINT64:
-      return "uint64";
-    case DataType::UINT8:
-      return "uint8";
-    case DataType::UNKNOWN:
-      return "unknown";
-  }
-  return "undefined";
-}
-
-std::string ToCLDataType(DataType data_type, int vec_size)
-{
-  const std::string postfix = vec_size == 1 ? "" : std::to_string(vec_size);
-  switch (data_type)
-  {
-    case DataType::FLOAT16:
-      return "half" + postfix;
-    case DataType::FLOAT32:
-      return "float" + postfix;
-    case DataType::FLOAT64:
-      return "double" + postfix;
-    case DataType::INT16:
-      return "short" + postfix;
-    case DataType::INT32:
-      return "int" + postfix;
-    case DataType::INT64:
-      return "long" + postfix;
-    case DataType::INT8:
-      return "char" + postfix;
-    case DataType::UINT16:
-      return "ushort" + postfix;
-    case DataType::UINT32:
-      return "uint" + postfix;
-    case DataType::UINT64:
-      return "ulong" + postfix;
-    case DataType::UINT8:
-      return "uchar" + postfix;
-    case DataType::UNKNOWN:
-      return "unknown";
-  }
-  return "undefined";
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/DataType.h b/runtime/onert/backend/gpu_cl/open_cl/DataType.h
deleted file mode 100644
index 2a5afd551..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/DataType.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_DATA_TYPE_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_DATA_TYPE_H__
-
-#include <stddef.h>
-#include <string>
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum class DataType
-{
-  UNKNOWN = 0,
-  FLOAT16 = 1,
-  FLOAT32 = 2,
-  FLOAT64 = 3,
-  UINT8 = 4,
-  INT8 = 5,
-  UINT16 = 6,
-  INT16 = 7,
-  UINT32 = 8,
-  INT32 = 9,
-  UINT64 = 10,
-  INT64 = 11,
-};
-
-size_t SizeOf(DataType type);
-
-std::string ToString(DataType t);
-
-std::string ToCLDataType(DataType data_type, int vec_size = 1);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_DATA_TYPE_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/DeviceInfo.cc b/runtime/onert/backend/gpu_cl/open_cl/DeviceInfo.cc
deleted file mode 100644
index 2966fad75..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/DeviceInfo.cc
+++ /dev/null
@@ -1,383 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DeviceInfo.h"
-
-#include <algorithm>
-#include <map>
-#include <string>
-#include <vector>
-
-#include "absl/strings/numbers.h"
-#include "absl/strings/str_split.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-namespace
-{
-// check that gpu_version belong to range min_version-max_version
-// min_version is included and max_version is excluded.
-bool IsGPUVersionInRange(int gpu_version, int min_version, int max_version)
-{
-  return gpu_version >= min_version && gpu_version < max_version;
-}
-
-MaliGPU GetMaliGPUVersion(const std::string &device_name)
-{
-  const std::map<std::string, MaliGPU> kMapping = {
-    {"T604", MaliGPU::T604}, {"T622", MaliGPU::T622}, {"T624", MaliGPU::T624},
-    {"T628", MaliGPU::T628}, {"T658", MaliGPU::T658}, {"T678", MaliGPU::T678},
-    {"T720", MaliGPU::T720}, {"T760", MaliGPU::T760}, {"T820", MaliGPU::T820},
-    {"T830", MaliGPU::T830}, {"T860", MaliGPU::T860}, {"T880", MaliGPU::T880},
-    {"G31", MaliGPU::G31},   {"G51", MaliGPU::G51},   {"G71", MaliGPU::G71},
-    {"G52", MaliGPU::G52},   {"G72", MaliGPU::G72},   {"G76", MaliGPU::G76},
-    {"G57", MaliGPU::G57},   {"G77", MaliGPU::G77},   {"G68", MaliGPU::G68},
-    {"G78", MaliGPU::G78},
-  };
-  for (const auto &v : kMapping)
-  {
-    if (device_name.find(v.first) != std::string::npos)
-    {
-      return v.second;
-    }
-  }
-  return MaliGPU::UNKNOWN;
-}
-
-} // namespace
-
-// There is no rule for gpu version encoding, but we found these samples:
-// Version: OpenCL C 2.0 Adreno(TM) 540   // Pixel 2
-// Version: OpenCL C 2.0 Adreno(TM) 630   // Sony Compact XZ2
-// Version: OpenCL C 2.0 Adreno(TM) 630   // Pixel 3
-// Version: OpenCL C 2.0 Adreno(TM) 540   // Samsung S8
-// Version: OpenCL C 1.2 Adreno(TM) 430   // HTC One M9
-// Version: OpenCL C 2.0 Adreno(TM) 530   // Samsung S7 Edge
-// Version: OpenCL C 1.2 Adreno(TM) 405   // Motorola Moto G(4)
-// After the number string ends.
-// It is assumed that the <vendor-specific information> for Adreno GPUs has
-// the following format:
-// <text?><space?>Adreno(TM)<space><text?><version>
-// Returns -1 if vendor-specific information cannot be parsed
-int GetAdrenoGPUVersion(const std::string &gpu_version)
-{
-  const std::string gpu = absl::AsciiStrToLower(gpu_version);
-  const std::vector<absl::string_view> words = absl::StrSplit(gpu, ' ');
-  size_t i = 0;
-  for (; i < words.size(); ++i)
-  {
-    if (words[i].find("adreno") != words[i].npos)
-    {
-      break;
-    }
-  }
-  i += 1;
-  for (; i < words.size(); ++i)
-  {
-    int number;
-    bool is_number = absl::SimpleAtoi(words[i], &number);
-    // Adreno GPUs starts from 2xx, but opencl support should be only from 3xx
-    if (is_number && number >= 300)
-    {
-      return number;
-    }
-  }
-  return -1;
-}
-
-std::string VendorToString(Vendor v)
-{
-  switch (v)
-  {
-    case Vendor::kQualcomm:
-      return "Qualcomm";
-    case Vendor::kMali:
-      return "Mali";
-    case Vendor::kPowerVR:
-      return "PowerVR";
-    case Vendor::kNvidia:
-      return "NVIDIA";
-    case Vendor::kAMD:
-      return "AMD";
-    case Vendor::kIntel:
-      return "Intel";
-    case Vendor::kUnknown:
-      return "unknown vendor";
-    default:
-      return "Error";
-  }
-}
-
-std::string OpenCLVersionToString(OpenCLVersion version)
-{
-  switch (version)
-  {
-    case OpenCLVersion::CL_1_0:
-      return "1.0";
-    case OpenCLVersion::CL_1_1:
-      return "1.1";
-    case OpenCLVersion::CL_1_2:
-      return "1.2";
-    case OpenCLVersion::CL_2_0:
-      return "2.0";
-    case OpenCLVersion::CL_2_1:
-      return "2.1";
-    case OpenCLVersion::CL_2_2:
-      return "2.2";
-    case OpenCLVersion::CL_3_0:
-      return "3.0";
-    default:
-      return "Error";
-  }
-}
-
-AdrenoInfo::AdrenoInfo(const std::string &device_version)
-  : gpu_version(GetAdrenoGPUVersion(device_version))
-{
-}
-
-int AdrenoInfo::GetMaximumWavesCount() const
-{
-  if (gpu_version < 400)
-  {
-    return -1; // Adreno 3xx does not support it currently
-  }
-  else if (gpu_version >= 400 && gpu_version < 500)
-  {
-    return -1; // Adreno 4xx does not support it currently
-  }
-  else if (gpu_version >= 500 && gpu_version < 600)
-  {
-    return -1; // Adreno 5xx does not support it currently
-  }
-  else if (gpu_version >= 600 && gpu_version < 700)
-  {
-    return gpu_version == 640 ? 30 : 16;
-  }
-  else
-  {
-    return -1; //  Adreno 7xx and higher does not exist yet
-  }
-}
-
-int AdrenoInfo::GetRegisterMemorySizePerComputeUnit() const
-{
-  if (gpu_version < 400)
-  {
-    return -1; // Adreno 3xx does not support it currently
-  }
-  else if (gpu_version >= 400 && gpu_version < 500)
-  {
-    return -1; // Adreno 4xx does not support it currently
-  }
-  else if (gpu_version >= 500 && gpu_version < 600)
-  {
-    return -1; // Adreno 5xx does not support it currently
-  }
-  else if (gpu_version >= 600 && gpu_version < 700)
-  {
-    return gpu_version == 640 ? 128 * 144 * 16 : 128 * 96 * 16;
-  }
-  else
-  {
-    return -1; //  Adreno 7xx and higher does not exist yet
-  }
-}
-
-int AdrenoInfo::GetMaximumWavesCount(int register_footprint_per_tread, bool full_wave) const
-{
-  const int register_usage_per_wave = GetWaveSize(full_wave) * register_footprint_per_tread;
-  const int possible_waves_count = GetRegisterMemorySizePerComputeUnit() / register_usage_per_wave;
-  return std::min(possible_waves_count, GetMaximumWavesCount());
-}
-
-int AdrenoInfo::GetWaveSize(bool full_wave) const
-{
-  if (gpu_version < 400)
-  {
-    return -1; // Adreno 3xx does not support it currently
-  }
-  else if (gpu_version < 600)
-  {
-    return full_wave ? 64 : 32;
-  }
-  else
-  {
-    return full_wave ? 128 : 64;
-  }
-}
-
-MaliInfo::MaliInfo(const std::string &device_name) : gpu_version(GetMaliGPUVersion(device_name)) {}
-
-bool MaliInfo::IsMaliT6xx() const
-{
-  return gpu_version == MaliGPU::T604 || gpu_version == MaliGPU::T622 ||
-         gpu_version == MaliGPU::T624 || gpu_version == MaliGPU::T628 ||
-         gpu_version == MaliGPU::T658 || gpu_version == MaliGPU::T678;
-}
-
-bool MaliInfo::IsMaliT7xx() const
-{
-  return gpu_version == MaliGPU::T720 || gpu_version == MaliGPU::T760;
-}
-
-bool MaliInfo::IsMaliT8xx() const
-{
-  return gpu_version == MaliGPU::T820 || gpu_version == MaliGPU::T830 ||
-         gpu_version == MaliGPU::T860 || gpu_version == MaliGPU::T880;
-}
-
-bool MaliInfo::IsMidgard() const { return IsMaliT6xx() || IsMaliT7xx() || IsMaliT8xx(); }
-
-bool MaliInfo::IsBifrostGen1() const
-{
-  return gpu_version == MaliGPU::G31 || gpu_version == MaliGPU::G51 || gpu_version == MaliGPU::G71;
-}
-
-bool MaliInfo::IsBifrostGen2() const
-{
-  return gpu_version == MaliGPU::G52 || gpu_version == MaliGPU::G72;
-}
-
-bool MaliInfo::IsBifrostGen3() const { return gpu_version == MaliGPU::G76; }
-
-bool MaliInfo::IsBifrost() const { return IsBifrostGen1() || IsBifrostGen2() || IsBifrostGen3(); }
-
-bool MaliInfo::IsValhall() const
-{
-  return gpu_version == MaliGPU::G57 || gpu_version == MaliGPU::G77 ||
-         gpu_version == MaliGPU::G68 || gpu_version == MaliGPU::G78;
-}
-
-bool DeviceInfo::SupportsTextureArray() const { return cl_version >= OpenCLVersion::CL_1_2; }
-
-bool DeviceInfo::SupportsImageBuffer() const { return cl_version >= OpenCLVersion::CL_1_2; }
-
-bool DeviceInfo::SupportsImage3D() const
-{
-  if (vendor == Vendor::kMali)
-  {
-    // On Mali T880 read_imageh doesn't compile with image3d_t
-    return false;
-  }
-  return supports_image3d_writes;
-}
-
-bool DeviceInfo::SupportsFloatImage2D(DataType data_type, int channels) const
-{
-  if (channels == 1)
-  {
-    return data_type == DataType::FLOAT32 ? supports_r_f32_tex2d : supports_r_f16_tex2d;
-  }
-  else if (channels == 2)
-  {
-    return data_type == DataType::FLOAT32 ? supports_rg_f32_tex2d : supports_rg_f16_tex2d;
-  }
-  else if (channels == 3)
-  {
-    return data_type == DataType::FLOAT32 ? supports_rgb_f32_tex2d : supports_rgb_f16_tex2d;
-  }
-  else if (channels == 4)
-  {
-    return data_type == DataType::FLOAT32 ? supports_rgba_f32_tex2d : supports_rgba_f16_tex2d;
-  }
-  else
-  {
-    return false;
-  }
-}
-
-bool DeviceInfo::SupportsOneLayerTextureArray() const
-{
-  return !IsAdreno() || adreno_info.support_one_layer_texture_array;
-}
-
-bool DeviceInfo::SupportsExtension(const std::string &extension) const
-{
-  for (const auto &ext : extensions)
-  {
-    if (ext == extension)
-    {
-      return true;
-    }
-  }
-  return false;
-}
-
-bool DeviceInfo::IsCL20OrHigher() const
-{
-  return cl_version != OpenCLVersion::CL_1_0 && cl_version != OpenCLVersion::CL_1_1 &&
-         cl_version != OpenCLVersion::CL_1_2;
-}
-
-bool DeviceInfo::SupportsSubGroupWithSize(int sub_group_size) const
-{
-  for (auto subgroup_size : supported_subgroup_sizes)
-  {
-    if (sub_group_size == subgroup_size)
-    {
-      return true;
-    }
-  }
-  return false;
-}
-
-bool DeviceInfo::IsAdreno() const { return vendor == Vendor::kQualcomm; }
-
-bool DeviceInfo::IsAdreno3xx() const
-{
-  return IsAdreno() && IsGPUVersionInRange(adreno_info.gpu_version, 300, 400);
-}
-
-bool DeviceInfo::IsAdreno4xx() const
-{
-  return IsAdreno() && IsGPUVersionInRange(adreno_info.gpu_version, 400, 500);
-}
-
-bool DeviceInfo::IsAdreno5xx() const
-{
-  return IsAdreno() && IsGPUVersionInRange(adreno_info.gpu_version, 500, 600);
-}
-
-bool DeviceInfo::IsAdreno6xx() const
-{
-  return IsAdreno() && IsGPUVersionInRange(adreno_info.gpu_version, 600, 700);
-}
-
-bool DeviceInfo::IsAdreno6xxOrHigher() const
-{
-  return IsAdreno() && adreno_info.gpu_version >= 600;
-}
-
-bool DeviceInfo::IsPowerVR() const { return vendor == Vendor::kPowerVR; }
-
-bool DeviceInfo::IsNvidia() const { return vendor == Vendor::kNvidia; }
-
-bool DeviceInfo::IsMali() const { return vendor == Vendor::kMali; }
-
-bool DeviceInfo::IsAMD() const { return vendor == Vendor::kAMD; }
-
-bool DeviceInfo::IsIntel() const { return vendor == Vendor::kIntel; }
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/DeviceInfo.h b/runtime/onert/backend/gpu_cl/open_cl/DeviceInfo.h
deleted file mode 100644
index 85d7d4c80..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/DeviceInfo.h
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_DEVICE_INFO_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_DEVICE_INFO_H__
-
-#include <string>
-#include <vector>
-
-#include "DataType.h"
-
-// for use only in device_info.cc, but keep here to make tests
-int GetAdrenoGPUVersion(const std::string &gpu_version);
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum class Vendor
-{
-  kQualcomm,
-  kMali,
-  kPowerVR,
-  kNvidia,
-  kAMD,
-  kIntel,
-  kUnknown
-};
-std::string VendorToString(Vendor v);
-
-enum class OpenCLVersion
-{
-  UNKNOWN,
-  CL_1_0,
-  CL_1_1,
-  CL_1_2,
-  CL_2_0,
-  CL_2_1,
-  CL_2_2,
-  CL_3_0
-};
-std::string OpenCLVersionToString(OpenCLVersion version);
-
-struct AdrenoInfo
-{
-  AdrenoInfo() = default;
-  explicit AdrenoInfo(const std::string &device_version);
-  int gpu_version = -1; // can be, for example, 405/430/540/530/630 etc.
-
-  // This function returns some not very documented physical parameter of
-  // Adreno6xx GPU.
-  // We obtained it using Snapdragon Profiler.
-  int GetMaximumWavesCount() const;
-
-  // returns amount of register memory per CU(Compute Unit) in bytes.
-  int GetRegisterMemorySizePerComputeUnit() const;
-
-  // returns maximum possible amount of waves based on register usage.
-  int GetMaximumWavesCount(int register_footprint_per_tread, bool full_wave = true) const;
-
-  int GetWaveSize(bool full_wave) const;
-
-  // Not supported on some Adreno devices with specific driver version.
-  // b/131099086
-  bool support_one_layer_texture_array = true;
-};
-
-enum class MaliGPU
-{
-  T604,
-  T622,
-  T624,
-  T628,
-  T658,
-  T678,
-  T720,
-  T760,
-  T820,
-  T830,
-  T860,
-  T880,
-  G31,
-  G51,
-  G71,
-  G52,
-  G72,
-  G76,
-  G57,
-  G77,
-  G68,
-  G78,
-  UNKNOWN
-};
-
-struct MaliInfo
-{
-  MaliInfo() = default;
-  explicit MaliInfo(const std::string &device_name);
-  MaliGPU gpu_version = MaliGPU::UNKNOWN;
-
-  bool IsMaliT6xx() const;
-  bool IsMaliT7xx() const;
-  bool IsMaliT8xx() const;
-  bool IsMidgard() const;
-  bool IsBifrostGen1() const;
-  bool IsBifrostGen2() const;
-  bool IsBifrostGen3() const;
-  bool IsBifrost() const;
-  bool IsValhall() const;
-};
-
-struct DeviceInfo
-{
-  DeviceInfo() = default;
-
-  bool IsAdreno() const;
-  bool IsAdreno3xx() const;
-  bool IsAdreno4xx() const;
-  bool IsAdreno5xx() const;
-  bool IsAdreno6xx() const;
-  bool IsAdreno6xxOrHigher() const;
-  bool IsPowerVR() const;
-  bool IsNvidia() const;
-  bool IsMali() const;
-  bool IsAMD() const;
-  bool IsIntel() const;
-
-  bool SupportsTextureArray() const;
-  bool SupportsImageBuffer() const;
-  bool SupportsImage3D() const;
-
-  bool SupportsFloatImage2D(DataType data_type, int channels) const;
-
-  // To track bug on some Adreno. b/131099086
-  bool SupportsOneLayerTextureArray() const;
-
-  bool SupportsExtension(const std::string &extension) const;
-  bool IsCL20OrHigher() const;
-  bool SupportsSubGroupWithSize(int sub_group_size) const;
-
-  std::vector<std::string> extensions;
-  bool supports_fp16 = false;
-  bool supports_image3d_writes = false;
-  Vendor vendor = Vendor::kUnknown;
-  OpenCLVersion cl_version = OpenCLVersion::UNKNOWN;
-  int compute_units_count = 0;
-  uint64_t buffer_max_size = 0;
-  uint64_t image2d_max_width = 0;
-  uint64_t image2d_max_height = 0;
-  uint64_t image_buffer_max_size = 0;
-  uint64_t image_array_max_layers = 0;
-  uint64_t image3d_max_width = 0;
-  uint64_t image3d_max_height = 0;
-  uint64_t image3d_max_depth = 0;
-  int max_work_group_size_x = 0;
-  int max_work_group_size_y = 0;
-  int max_work_group_size_z = 0;
-  std::vector<int> supported_subgroup_sizes;
-
-  // rtn is ROUND_TO_NEAREST
-  // with rtn precision is much better then with rtz (ROUND_TO_ZERO)
-  // Adreno 3xx supports only rtz, Adreno 4xx and more support rtn
-  // Mali from T6xx supports rtn
-  // PowerVR supports only rtz
-  bool supports_fp32_rtn = false;
-  bool supports_fp16_rtn = false;
-
-  bool supports_r_f16_tex2d = false;
-  bool supports_rg_f16_tex2d = false;
-  bool supports_rgb_f16_tex2d = false;
-  bool supports_rgba_f16_tex2d = false;
-
-  bool supports_r_f32_tex2d = false;
-  bool supports_rg_f32_tex2d = false;
-  bool supports_rgb_f32_tex2d = false;
-  bool supports_rgba_f32_tex2d = false;
-
-  AdrenoInfo adreno_info;
-  MaliInfo mali_info;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_DEVICE_INFO_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Environment.cc b/runtime/onert/backend/gpu_cl/open_cl/Environment.cc
deleted file mode 100644
index b558f0377..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Environment.cc
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Environment.h"
-
-#include <string>
-#include <vector>
-
-#include "Util.h"
-#include "Shape.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-Environment::Environment(CLDevice &&device, CLContext &&context, CLCommandQueue &&queue,
-                         ProfilingCommandQueue &&profiling_queue)
-  : device_(std::move(device)), context_(std::move(context)), queue_(std::move(queue)),
-    profiling_queue_(std::move(profiling_queue))
-{
-}
-
-Environment::Environment(Environment &&environment)
-  : device_(std::move(environment.device_)), context_(std::move(environment.context_)),
-    queue_(std::move(environment.queue_)),
-    profiling_queue_(std::move(environment.profiling_queue_)),
-    program_cache_(std::move(environment.program_cache_))
-{
-}
-
-Environment &Environment::operator=(Environment &&environment)
-{
-  if (this != &environment)
-  {
-    device_ = std::move(environment.device_);
-    context_ = std::move(environment.context_);
-    queue_ = std::move(environment.queue_);
-    profiling_queue_ = std::move(environment.profiling_queue_);
-    program_cache_ = std::move(environment.program_cache_);
-  }
-  return *this;
-}
-
-absl::Status Environment::Init()
-{
-  if (device().IsAdreno() && device().SupportsTextureArray())
-  {
-    // Some Adreno < 600 have bug with one layer texture array. b/131099086
-    // If we have one layer texture array and will write smt from kernel to this
-    // texture, we will get zeroes instead of actual values.
-    // The same kernel will work, if we use texture array with more than one
-    // layer.
-    if (device().info_.adreno_info.gpu_version < 600)
-    {
-      GetDevicePtr()->DisableOneLayerTextureArray();
-    }
-  }
-  return absl::OkStatus();
-}
-
-void Environment::SetHighPerformance() const
-{
-  // TODO(sorokin) use cl_perf_hint if available
-}
-
-void Environment::SetDefaultPerformance() const
-{
-  // TODO(sorokin) use cl_perf_hint if available
-}
-
-void Environment::SetLowPerformance() const
-{
-  // TODO(sorokin) use cl_perf_hint if available
-}
-
-std::vector<CalculationsPrecision> Environment::GetSupportedPrecisions() const
-{
-  std::vector<CalculationsPrecision> precisions;
-  for (CalculationsPrecision precision :
-       {CalculationsPrecision::F32, CalculationsPrecision::F32_F16, CalculationsPrecision::F16})
-  {
-    if (IsSupported(precision))
-    {
-      precisions.push_back(precision);
-    }
-  }
-  return precisions;
-}
-
-bool Environment::IsSupported(CalculationsPrecision precision) const
-{
-  switch (precision)
-  {
-    case CalculationsPrecision::F32_F16:
-    case CalculationsPrecision::F16:
-      return device_.SupportsFP16();
-    case CalculationsPrecision::F32:
-      return true;
-  }
-  return false;
-}
-
-std::vector<TensorStorageType> Environment::GetSupportedStorages() const
-{
-  std::vector<TensorStorageType> storage_types;
-  for (auto storage_type :
-       {TensorStorageType::TEXTURE_2D, TensorStorageType::BUFFER, TensorStorageType::TEXTURE_ARRAY,
-        TensorStorageType::IMAGE_BUFFER, TensorStorageType::TEXTURE_3D})
-  {
-    if (IsSupported(storage_type))
-    {
-      storage_types.push_back(storage_type);
-    }
-  }
-  return storage_types;
-}
-
-std::vector<TensorStorageType> Environment::GetSupportedStoragesWithHWZeroClampSupport() const
-{
-  std::vector<TensorStorageType> storage_types;
-  for (auto storage_type : {TensorStorageType::TEXTURE_2D, TensorStorageType::TEXTURE_ARRAY,
-                            TensorStorageType::TEXTURE_3D})
-  {
-    if (IsSupported(storage_type))
-    {
-      storage_types.push_back(storage_type);
-    }
-  }
-  return storage_types;
-}
-
-bool Environment::IsSupported(TensorStorageType storage_type) const
-{
-  switch (storage_type)
-  {
-    case TensorStorageType::TEXTURE_2D:
-      return !device_.IsAMD();
-    case TensorStorageType::BUFFER:
-      return true;
-    case TensorStorageType::TEXTURE_ARRAY:
-      return !device_.IsAMD() && device_.SupportsTextureArray();
-    case TensorStorageType::IMAGE_BUFFER:
-      return (device_.IsAdreno() || device_.IsAMD() || device_.IsNvidia()) &&
-             device_.SupportsImageBuffer();
-    case TensorStorageType::TEXTURE_3D:
-      return !device_.IsAMD() && device_.SupportsImage3D();
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      return false;
-    case TensorStorageType::UNKNOWN:
-      return false;
-  }
-  return false;
-}
-
-TensorStorageType GetFastestStorageType(const DeviceInfo &gpu_info)
-{
-  if (gpu_info.IsAdreno())
-  {
-    if (gpu_info.IsAdreno6xxOrHigher())
-    {
-      return TensorStorageType::TEXTURE_ARRAY;
-    }
-    else
-    {
-      return TensorStorageType::TEXTURE_2D;
-    }
-  }
-  else if (gpu_info.IsPowerVR())
-  {
-    return TensorStorageType::TEXTURE_2D;
-  }
-  else if (gpu_info.IsMali())
-  {
-    const MaliInfo mali_info = gpu_info.mali_info;
-    if (mali_info.IsMaliT8xx() || mali_info.IsBifrostGen3() || mali_info.IsValhall())
-    {
-      return TensorStorageType::TEXTURE_2D;
-    }
-    else
-    {
-      return TensorStorageType::BUFFER;
-    }
-  }
-  else if (gpu_info.IsNvidia())
-  {
-    return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
-                                          : TensorStorageType::BUFFER;
-  }
-  else if (gpu_info.IsAMD())
-  {
-    return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
-                                          : TensorStorageType::BUFFER;
-  }
-  else if (gpu_info.IsIntel())
-  {
-    return TensorStorageType::BUFFER;
-  }
-  return TensorStorageType::BUFFER;
-}
-
-TensorStorageType GetStorageTypeWithMinimalMemoryConsumption(const DeviceInfo &gpu_info)
-{
-  if (gpu_info.IsAdreno())
-  {
-    if (gpu_info.IsAdreno3xx() || gpu_info.IsAdreno4xx())
-    {
-      return TensorStorageType::BUFFER;
-    }
-    else
-    {
-      return TensorStorageType::IMAGE_BUFFER;
-    }
-  }
-  else if (gpu_info.IsPowerVR())
-  {
-    return TensorStorageType::BUFFER;
-  }
-  else if (gpu_info.IsMali())
-  {
-    return TensorStorageType::BUFFER;
-  }
-  else if (gpu_info.IsNvidia())
-  {
-    return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
-                                          : TensorStorageType::BUFFER;
-  }
-  else if (gpu_info.IsAMD())
-  {
-    return gpu_info.SupportsImageBuffer() ? TensorStorageType::IMAGE_BUFFER
-                                          : TensorStorageType::BUFFER;
-  }
-  else if (gpu_info.IsIntel())
-  {
-    return TensorStorageType::BUFFER;
-  }
-  return TensorStorageType::BUFFER;
-}
-
-absl::Status CreateEnvironment(Environment *result)
-{
-  CLDevice gpu;
-  RETURN_IF_ERROR(CreateDefaultGPUDevice(&gpu));
-
-  CLContext context;
-  RETURN_IF_ERROR(CreateCLContext(gpu, &context));
-  CLCommandQueue queue;
-  RETURN_IF_ERROR(CreateCLCommandQueue(gpu, context, &queue));
-  ProfilingCommandQueue profiling_queue;
-  RETURN_IF_ERROR(CreateProfilingCommandQueue(gpu, context, &profiling_queue));
-
-  *result =
-    Environment(std::move(gpu), std::move(context), std::move(queue), std::move(profiling_queue));
-  return result->Init();
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Environment.h b/runtime/onert/backend/gpu_cl/open_cl/Environment.h
deleted file mode 100644
index 47866b563..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Environment.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_ENVIRONMENT_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_ENVIRONMENT_H__
-
-#include "ClCommandQueue.h"
-#include "ClContext.h"
-#include "ClDevice.h"
-#include "DeviceInfo.h"
-#include "Precision.h"
-#include "TensorType.h"
-#include "DataType.h"
-#include "ProgramCache.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-class Environment
-{
-public:
-  Environment() = default;
-  explicit Environment(CLDevice &&device, CLContext &&context, CLCommandQueue &&queue,
-                       ProfilingCommandQueue &&profiling_queue);
-  // Move only
-  Environment(Environment &&environment);
-  Environment &operator=(Environment &&environment);
-  Environment(const Environment &) = delete;
-  Environment &operator=(const Environment &) = delete;
-
-  const CLDevice &device() const { return device_; }
-  CLDevice *GetDevicePtr() { return &device_; }
-  const CLDevice *GetDevicePtr() const { return &device_; }
-  CLContext &context() { return context_; }
-  CLCommandQueue *queue() { return &queue_; }
-  ProfilingCommandQueue *profiling_queue() { return &profiling_queue_; }
-  ProgramCache *program_cache() { return &program_cache_; }
-  const ProgramCache *program_cache() const { return &program_cache_; }
-
-  std::vector<CalculationsPrecision> GetSupportedPrecisions() const;
-  bool IsSupported(CalculationsPrecision precision) const;
-  std::vector<TensorStorageType> GetSupportedStorages() const;
-  // returns storage types that support zero clamping when reading OOB in HW
-  // (Height/Width) dimensions.
-  std::vector<TensorStorageType> GetSupportedStoragesWithHWZeroClampSupport() const;
-  bool IsSupported(TensorStorageType storage_type) const;
-
-  absl::Status Init();
-
-  void SetHighPerformance() const;
-  void SetDefaultPerformance() const;
-  void SetLowPerformance() const; // for energy saving
-
-private:
-  CLDevice device_;
-  CLContext context_;
-  CLCommandQueue queue_;
-  ProfilingCommandQueue profiling_queue_;
-  ProgramCache program_cache_;
-};
-
-TensorStorageType GetFastestStorageType(const DeviceInfo &gpu_info);
-TensorStorageType GetStorageTypeWithMinimalMemoryConsumption(const DeviceInfo &gpu_info);
-
-absl::Status CreateEnvironment(Environment *result);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_ENVIRONMENT_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/GpuObject.h b/runtime/onert/backend/gpu_cl/open_cl/GpuObject.h
deleted file mode 100644
index a31630235..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/GpuObject.h
+++ /dev/null
@@ -1,222 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_GPU_OBJECT_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_GPU_OBJECT_H__
-
-#include <map>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "ClContext.h"
-#include "OpenclWrapper.h"
-#include "AccessType.h"
-#include "DataType.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-struct GPUImage2DDescriptor
-{
-  DataType data_type = DataType::UNKNOWN;
-  AccessType access_type = AccessType::UNKNOWN;
-  cl_mem memory = nullptr;
-};
-
-struct GPUImage3DDescriptor
-{
-  DataType data_type = DataType::UNKNOWN;
-  AccessType access_type = AccessType::UNKNOWN;
-  cl_mem memory = nullptr;
-};
-
-struct GPUImage2DArrayDescriptor
-{
-  DataType data_type = DataType::UNKNOWN;
-  AccessType access_type = AccessType::UNKNOWN;
-  cl_mem memory = nullptr;
-};
-
-struct GPUImageBufferDescriptor
-{
-  DataType data_type = DataType::UNKNOWN;
-  AccessType access_type = AccessType::UNKNOWN;
-  cl_mem memory = nullptr;
-};
-
-struct GPUCustomMemoryDescriptor
-{
-  std::string type_name = "";
-  cl_mem memory = nullptr;
-};
-
-enum class MemoryType
-{
-  GLOBAL,
-  CONSTANT,
-  LOCAL
-};
-
-std::string MemoryTypeToCLType(MemoryType type);
-
-struct GPUBufferDescriptor
-{
-  DataType data_type = DataType::UNKNOWN;
-  AccessType access_type = AccessType::UNKNOWN;
-  int element_size = 0;
-  MemoryType memory_type = MemoryType::GLOBAL;
-  std::vector<std::string> attributes;
-  cl_mem memory = nullptr;
-};
-
-struct GPUResources
-{
-  std::vector<std::string> ints;
-  std::vector<std::string> floats;
-  std::vector<std::pair<std::string, GPUBufferDescriptor>> buffers;
-  std::vector<std::pair<std::string, GPUImage2DDescriptor>> images2d;
-  std::vector<std::pair<std::string, GPUImage2DArrayDescriptor>> image2d_arrays;
-  std::vector<std::pair<std::string, GPUImage3DDescriptor>> images3d;
-  std::vector<std::pair<std::string, GPUImageBufferDescriptor>> image_buffers;
-  std::vector<std::pair<std::string, GPUCustomMemoryDescriptor>> custom_memories;
-
-  std::vector<std::string> GetNames() const
-  {
-    std::vector<std::string> names = ints;
-    names.insert(names.end(), floats.begin(), floats.end());
-    for (const auto &obj : buffers)
-    {
-      names.push_back(obj.first);
-    }
-    for (const auto &obj : images2d)
-    {
-      names.push_back(obj.first);
-    }
-    for (const auto &obj : image2d_arrays)
-    {
-      names.push_back(obj.first);
-    }
-    for (const auto &obj : images3d)
-    {
-      names.push_back(obj.first);
-    }
-    for (const auto &obj : image_buffers)
-    {
-      names.push_back(obj.first);
-    }
-    for (const auto &obj : custom_memories)
-    {
-      names.push_back(obj.first);
-    }
-    return names;
-  }
-};
-
-struct GPUResourcesWithValue
-{
-  std::vector<std::pair<std::string, int>> ints;
-  std::vector<std::pair<std::string, float>> floats;
-  std::vector<std::pair<std::string, cl_mem>> buffers;
-  std::vector<std::pair<std::string, cl_mem>> images2d;
-  std::vector<std::pair<std::string, cl_mem>> image2d_arrays;
-  std::vector<std::pair<std::string, cl_mem>> images3d;
-  std::vector<std::pair<std::string, cl_mem>> image_buffers;
-  std::vector<std::pair<std::string, cl_mem>> custom_memories;
-};
-
-class GPUObject;
-
-class GPUObjectDescriptor
-{
-public:
-  GPUObjectDescriptor() = default;
-  GPUObjectDescriptor(const GPUObjectDescriptor &) = default;
-  GPUObjectDescriptor &operator=(const GPUObjectDescriptor &) = default;
-  GPUObjectDescriptor(GPUObjectDescriptor &&obj_desc) : state_vars_(std::move(obj_desc.state_vars_))
-  {
-  }
-  GPUObjectDescriptor &operator=(GPUObjectDescriptor &&obj_desc)
-  {
-    if (this != &obj_desc)
-    {
-      state_vars_ = std::move(obj_desc.state_vars_);
-    }
-    return *this;
-  }
-  virtual ~GPUObjectDescriptor() = default;
-
-  void SetStateVar(const std::string &key, const std::string &value) const
-  {
-    state_vars_[key] = value;
-  }
-
-  virtual std::string PerformConstExpr(const std::string &) const { return ""; }
-
-  virtual absl::Status PerformSelector(const std::string &, const std::vector<std::string> &,
-                                       const std::vector<std::string> &, std::string *result) const
-  {
-    *result = "";
-    return absl::OkStatus();
-  }
-  virtual GPUResources GetGPUResources() const { return GPUResources(); }
-
-  virtual absl::Status CreateGPUObject(CLContext *, std::unique_ptr<GPUObject> *) const
-  {
-    return absl::OkStatus();
-  }
-  virtual void Release() {}
-
-  void SetAccess(AccessType access_type) { access_type_ = access_type; }
-  AccessType GetAccess() const { return access_type_; }
-
-protected:
-  // friend flatbuffers::Offset<data::GPUObjectDescriptor> Encode(
-  //     const GPUObjectDescriptor& desc, flatbuffers::FlatBufferBuilder* builder);
-  // friend void Decode(const data::GPUObjectDescriptor* fb_obj,
-  //                    GPUObjectDescriptor* obj);
-  mutable std::map<std::string, std::string> state_vars_;
-  AccessType access_type_ = AccessType::UNKNOWN;
-};
-
-using GPUObjectDescriptorPtr = std::unique_ptr<GPUObjectDescriptor>;
-
-class GPUObject
-{
-public:
-  GPUObject() = default;
-  // Move only
-  GPUObject(GPUObject &&obj_desc) = default;
-  GPUObject &operator=(GPUObject &&obj_desc) = default;
-  GPUObject(const GPUObject &) = delete;
-  GPUObject &operator=(const GPUObject &) = delete;
-  virtual ~GPUObject() = default;
-  virtual absl::Status GetGPUResources(const GPUObjectDescriptor *obj_ptr,
-                                       GPUResourcesWithValue *resources) const = 0;
-};
-
-using GPUObjectPtr = std::unique_ptr<GPUObject>;
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_GPU_OBJECT_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/InferenceContext.cc b/runtime/onert/backend/gpu_cl/open_cl/InferenceContext.cc
deleted file mode 100644
index afb7e2950..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/InferenceContext.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "InferenceContext.h"
-
-#include <algorithm>
-#include <cmath>
-#include <cstdint>
-#include <map>
-#include <memory>
-#include <string>
-#include <vector>
-#include <unordered_map>
-
-#include "Buffer.h"
-#include "ClDevice.h"
-
-#include "kernels/GpuOperation.h"
-#include "ModelHints.h"
-#include "Precision.h"
-#include "StorageTypeUtil.h"
-#include "TensorType.h"
-#include "DataType.h"
-#include "Model.h"
-#include "Operations.h"
-#include "Shape.h"
-#include "Types.h"
-#include "Util.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-CLNode::CLNode(CLNode &&node)
-  : operation(std::move(node.operation)), inputs(std::move(node.inputs)),
-    outputs(std::move(node.outputs)), name(std::move(node.name))
-{
-}
-
-CLNode &CLNode::operator=(CLNode &&node)
-{
-  if (this != &node)
-  {
-    operation = std::move(node.operation);
-    inputs = std::move(node.inputs);
-    outputs = std::move(node.outputs);
-    name = std::move(node.name);
-  }
-  return *this;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/InferenceContext.h b/runtime/onert/backend/gpu_cl/open_cl/InferenceContext.h
deleted file mode 100644
index ebe2c5313..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/InferenceContext.h
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_INFERENCE_CONTEXT_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_INFERENCE_CONTEXT_H__
-
-#include <cstdint>
-#include <functional>
-#include <map>
-#include <memory>
-#include <vector>
-#include <unordered_map>
-
-#include "Buffer.h"
-#include "ClCommandQueue.h"
-#include "Environment.h"
-#include "GpuObject.h"
-#include "kernels/GpuOperation.h"
-#include "ModelHints.h"
-#include "OpenclWrapper.h"
-#include "Precision.h"
-#include "TensorType.h"
-#include "Model.h"
-#include "InternalTensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-struct CLNode
-{
-  std::unique_ptr<GPUOperation> operation;
-  std::vector<ValueId> inputs;
-  std::vector<ValueId> outputs;
-
-  // Mostly for debug purposes.
-  std::string name;
-
-  CLNode() = default;
-
-  CLNode(CLNode &&node);
-  CLNode &operator=(CLNode &&node);
-  CLNode(const CLNode &) = delete;
-  CLNode &operator=(const CLNode &) = delete;
-};
-
-class InferenceContext
-{
-public:
-  struct CreateInferenceInfo
-  {
-    CalculationsPrecision precision;
-    TensorStorageType storage_type;
-    ModelHints hints;
-  };
-
-  struct DummyTensor
-  {
-    BHWC shape;
-    TensorDescriptor descriptor;
-
-    bool operator==(const DummyTensor &b) const
-    {
-      return shape == b.shape && descriptor == b.descriptor;
-    }
-  };
-
-  class TensorReserver
-  {
-  public:
-    ValueId Add(const std::shared_ptr<DummyTensor> dummy)
-    {
-      reservations_[next_] = std::move(dummy);
-      return next_++;
-    }
-    void Add(ValueId id, const std::shared_ptr<DummyTensor> dummy)
-    {
-      reservations_[id] = std::move(dummy);
-    }
-    void SetNext(ValueId id) { next_ = id; }
-    bool HaveTensor(ValueId id) { return reservations_.find(id) != reservations_.end(); }
-    std::shared_ptr<DummyTensor> Get(ValueId id) { return reservations_[id]; }
-
-    std::vector<std::pair<ValueId, TensorDescriptor>> GetTensorDescs() const
-    {
-      std::vector<std::pair<ValueId, TensorDescriptor>> result;
-      for (auto &v : reservations_)
-      {
-        TensorDescriptor desc = v.second->descriptor;
-        desc.shape.b = v.second->shape.b;
-        desc.shape.h = v.second->shape.h;
-        desc.shape.w = v.second->shape.w;
-        desc.shape.d = 1;
-        desc.shape.c = v.second->shape.c;
-        result.push_back({v.first, desc});
-      }
-      return result;
-    }
-
-    void Add(const std::vector<std::pair<ValueId, TensorDescriptor>> &tensors)
-    {
-      for (auto &v : tensors)
-      {
-        auto dummy = std::make_shared<DummyTensor>();
-        dummy->descriptor = v.second;
-        dummy->shape.b = v.second.shape.b;
-        dummy->shape.h = v.second.shape.h;
-        dummy->shape.w = v.second.shape.w;
-        dummy->shape.c = v.second.shape.c;
-        Add(v.first, dummy);
-      }
-    }
-
-  private:
-    std::unordered_map<ValueId, std::shared_ptr<DummyTensor>> reservations_;
-    ValueId next_ = 0;
-  };
-
-private:
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_INFERENCE_CONTEXT_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/InternalTensor.h b/runtime/onert/backend/gpu_cl/open_cl/InternalTensor.h
deleted file mode 100644
index f0423db86..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/InternalTensor.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_INTERNAL_TENSOR_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_INTERNAL_TENSOR_H__
-
-#include <stdint.h>
-
-#include <vector>
-
-#include "DataType.h"
-#include "Shape.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace internal_tensor
-{
-
-// Meta function given element type returns a type for Tensor data container.
-template <DataType Type> struct StorageType;
-
-template <> struct StorageType<DataType::FLOAT32>
-{
-  using value = std::vector<float>;
-};
-
-template <> struct StorageType<DataType::INT32>
-{
-  using value = std::vector<int32_t>;
-};
-
-} // namespace internal_tensor
-
-template <typename ShapeT, DataType Type> struct InternalTensor
-{
-  using ShapeType = ShapeT;
-
-  constexpr static DataType kType = Type;
-
-  using TensorStorageType = typename internal_tensor::StorageType<Type>::value;
-
-  // Opaque id of a tensor.
-  int64_t id = -1;
-
-  ShapeType shape;
-
-  TensorStorageType data;
-};
-
-// TensorRef is a reference to another tensor. If an object should never hold
-// tensor data, then TensorRef should be used instead.
-template <typename ShapeT> struct TensorRef
-{
-  using ShapeType = ShapeT;
-
-  DataType type = DataType::UNKNOWN;
-
-  ShapeT shape;
-
-  // Opaque reference to a tensor. Upstream component is responsible for
-  // resolving this reference into an actual tensor.
-  int64_t ref = -1;
-
-  // Specifies if the tensor should be a variable input tensor that must be an
-  // output as well as an input to the graph.
-  bool is_variable_input = false;
-};
-
-template <typename ShapeT, DataType Type> constexpr DataType InternalTensor<ShapeT, Type>::kType;
-
-template <typename ShapeT, DataType Type>
-InternalTensor<ShapeT, Type> MakeZeroTensor(const ShapeT &shape)
-{
-  InternalTensor<ShapeT, Type> tensor;
-  tensor.shape = shape;
-  tensor.data =
-    typename InternalTensor<ShapeT, Type>::TensorStorageType(shape.DimensionsProduct(), 0);
-  return tensor;
-}
-
-using TensorFloat32 = InternalTensor<BHWC, DataType::FLOAT32>;
-using Tensor5DFloat32 = InternalTensor<BHWDC, DataType::FLOAT32>;
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_INTERNAL_TENSOR_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/LinearStorage.cc b/runtime/onert/backend/gpu_cl/open_cl/LinearStorage.cc
deleted file mode 100644
index 3889d4369..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/LinearStorage.cc
+++ /dev/null
@@ -1,265 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "LinearStorage.h"
-
-#include "absl/strings/str_cat.h"
-#include "DataType.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-TensorLinearDescriptor::TensorLinearDescriptor(TensorLinearDescriptor &&desc)
-  : GPUObjectDescriptor(std::move(desc)), storage_type(desc.storage_type),
-    element_type(desc.element_type), memory_type(desc.memory_type), size(desc.size),
-    data(std::move(desc.data))
-{
-}
-
-TensorLinearDescriptor &TensorLinearDescriptor::operator=(TensorLinearDescriptor &&desc)
-{
-  if (this != &desc)
-  {
-    std::swap(storage_type, desc.storage_type);
-    std::swap(element_type, desc.element_type);
-    std::swap(memory_type, desc.memory_type);
-    std::swap(size, desc.size);
-    data = std::move(desc.data);
-    GPUObjectDescriptor::operator=(std::move(desc));
-  }
-  return *this;
-}
-
-void TensorLinearDescriptor::Release() { data.clear(); }
-
-GPUResources TensorLinearDescriptor::GetGPUResources() const
-{
-  GPUResources resources;
-  resources.ints.push_back("length");
-  if (storage_type == LinearStorageType::BUFFER)
-  {
-    GPUBufferDescriptor desc;
-    desc.data_type = element_type;
-    desc.access_type = access_type_;
-    desc.element_size = 4;
-    desc.memory_type = memory_type;
-    resources.buffers.push_back({"buffer", desc});
-  }
-  else
-  {
-    GPUImage2DDescriptor desc;
-    desc.data_type = element_type;
-    desc.access_type = access_type_;
-    resources.images2d.push_back({"tex2d", desc});
-  }
-  return resources;
-}
-
-absl::Status TensorLinearDescriptor::PerformSelector(const std::string &selector,
-                                                     const std::vector<std::string> &args,
-                                                     const std::vector<std::string> &,
-                                                     std::string *result) const
-{
-  if (selector == "Length")
-  {
-    *result = "length";
-    return absl::OkStatus();
-  }
-  else if (selector == "Read")
-  {
-    return PerformReadSelector(args, result);
-  }
-  else if (selector == "GetPtr")
-  {
-    if (storage_type != LinearStorageType::BUFFER)
-    {
-      return absl::InvalidArgumentError(
-        "GetPtr selector supported for LinearStorageType::BUFFER only.");
-    }
-    *result = "buffer";
-    return absl::OkStatus();
-  }
-  else
-  {
-    return absl::NotFoundError(
-      absl::StrCat("TensorLinearDescriptor don't have selector with name - ", selector));
-  }
-}
-
-absl::Status TensorLinearDescriptor::PerformReadSelector(const std::vector<std::string> &args,
-                                                         std::string *result) const
-{
-  if (args.size() != 1)
-  {
-    return absl::NotFoundError(absl::StrCat(
-      "TensorLinearDescriptor Read require one argument, but ", args.size(), " was passed"));
-  }
-  if (storage_type == LinearStorageType::BUFFER)
-  {
-    *result = absl::StrCat("buffer[", args[0], "]");
-    return absl::OkStatus();
-  }
-  else
-  {
-    const std::string read = element_type == DataType::FLOAT16 ? "read_imageh" : "read_imagef";
-    *result = absl::StrCat(read, "(tex2d, smp_none, (int2)(", args[0], ", 0))");
-    return absl::OkStatus();
-  }
-}
-
-absl::Status TensorLinearDescriptor::CreateGPUObject(CLContext *context, GPUObjectPtr *result) const
-{
-  LinearStorage gpu_storage;
-  RETURN_IF_ERROR(gpu_storage.CreateFromTensorLinearDescriptor(*this, context));
-  *result = absl::make_unique<LinearStorage>(std::move(gpu_storage));
-  return absl::OkStatus();
-}
-
-void TensorLinearDescriptor::UploadLinearData(const InternalTensor<Linear, DataType::FLOAT32> &src,
-                                              int aligned_size)
-{
-  size = aligned_size == 0 ? DivideRoundUp(src.shape.v, 4) : aligned_size;
-  if (element_type == DataType::FLOAT32)
-  {
-    data.resize(size * sizeof(float) * 4);
-    float *gpu_data = reinterpret_cast<float *>(data.data());
-    for (int i = 0; i < size * 4; ++i)
-    {
-      if (i < src.shape.v)
-      {
-        gpu_data[i] = src.data[i];
-      }
-      else
-      {
-        gpu_data[i] = 0.0f;
-      }
-    }
-  }
-  // TODO
-  // It doesn't support F16 yet. I will try to add it later.
-  //
-  // else {
-  //   data.resize(size * sizeof(half) * 4);
-  //   half* gpu_data = reinterpret_cast<half*>(data.data());
-  //   for (int i = 0; i < size * 4; ++i) {
-  //     if (i < src.shape.v) {
-  //       gpu_data[i] = src.data[i];
-  //     } else {
-  //       gpu_data[i] = 0.0f;
-  //     }
-  //   }
-  // }
-}
-
-void LinearStorage::Release()
-{
-  if (memory_)
-  {
-    clReleaseMemObject(memory_);
-    memory_ = nullptr;
-  }
-}
-
-LinearStorage::LinearStorage(LinearStorage &&storage)
-  : GPUObject(std::move(storage)), memory_(storage.memory_), depth_(storage.depth_),
-    storage_type_(storage.storage_type_)
-{
-  storage.memory_ = nullptr;
-}
-
-LinearStorage &LinearStorage::operator=(LinearStorage &&storage)
-{
-  if (this != &storage)
-  {
-    Release();
-    std::swap(memory_, storage.memory_);
-    std::swap(depth_, storage.depth_);
-    std::swap(storage_type_, storage.storage_type_);
-    GPUObject::operator=(std::move(storage));
-  }
-  return *this;
-}
-
-absl::Status LinearStorage::GetGPUResources(const GPUObjectDescriptor *obj_ptr,
-                                            GPUResourcesWithValue *resources) const
-{
-  const auto *linear_desc = dynamic_cast<const TensorLinearDescriptor *>(obj_ptr);
-  if (!linear_desc)
-  {
-    return absl::InvalidArgumentError("Expected TensorLinearDescriptor on input.");
-  }
-
-  resources->ints.push_back({"length", depth_});
-
-  if (storage_type_ == LinearStorageType::BUFFER)
-  {
-    resources->buffers.push_back({"buffer", memory_});
-  }
-  else
-  {
-    resources->images2d.push_back({"tex2d", memory_});
-  }
-
-  return absl::OkStatus();
-}
-
-absl::Status LinearStorage::CreateFromTensorLinearDescriptor(const TensorLinearDescriptor &desc,
-                                                             CLContext *context)
-{
-  storage_type_ = desc.storage_type;
-  depth_ = desc.size;
-  uint8_t *data_ptr = desc.data.empty() ? nullptr : const_cast<unsigned char *>(desc.data.data());
-  if (storage_type_ == LinearStorageType::BUFFER)
-  {
-    bool read_only = desc.memory_type == MemoryType::CONSTANT;
-    uint8_t *data_ptr = desc.data.empty() ? nullptr : const_cast<unsigned char *>(desc.data.data());
-    // TODO
-    // It doesn't support F16 yet. I will try to add it later.
-    //
-    // const int float4_size = desc.element_type == DataType::FLOAT32
-    //                             ? sizeof(float) * 4
-    //                             : sizeof(half) * 4;
-    const int float4_size = sizeof(float) * 4;
-    return CreateCLBuffer(context->context(), depth_ * float4_size, read_only, data_ptr, &memory_);
-  }
-  else
-  {
-    return CreateRGBAImage2D(context->context(), depth_, 1,
-                             DataTypeToChannelType(desc.element_type), data_ptr, &memory_);
-  }
-}
-
-LinearStorageType DeduceLinearStorageType(TensorStorageType tensor_storage_type)
-{
-  if (tensor_storage_type == TensorStorageType::BUFFER)
-  {
-    return LinearStorageType::BUFFER;
-  }
-  else
-  {
-    return LinearStorageType::TEXTURE_2D;
-  }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/LinearStorage.h b/runtime/onert/backend/gpu_cl/open_cl/LinearStorage.h
deleted file mode 100644
index f6c3ac82f..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/LinearStorage.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_LINEAR_STORAGE_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_LINEAR_STORAGE_H__
-
-#include <string>
-#include <utility>
-
-#include "absl/strings/str_cat.h"
-#include "absl/types/span.h"
-#include "GpuObject.h"
-#include "OpenclWrapper.h"
-#include "TensorType.h"
-#include "Util.h"
-#include "DataType.h"
-#include "Status.h"
-#include "Types.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum class LinearStorageType
-{
-  BUFFER,
-  TEXTURE_2D
-};
-
-struct TensorLinearDescriptor : public GPUObjectDescriptor
-{
-  LinearStorageType storage_type;
-  DataType element_type;                       // FLOAT32 or FLOAT16
-  MemoryType memory_type = MemoryType::GLOBAL; // applicable for BUFFER
-
-  // optional
-  int size = 0;
-  std::vector<uint8_t> data;
-
-  TensorLinearDescriptor() = default;
-  TensorLinearDescriptor(const TensorLinearDescriptor &) = default;
-  TensorLinearDescriptor &operator=(const TensorLinearDescriptor &) = default;
-  TensorLinearDescriptor(TensorLinearDescriptor &&desc);
-  TensorLinearDescriptor &operator=(TensorLinearDescriptor &&desc);
-
-  void UploadLinearData(const InternalTensor<Linear, DataType::FLOAT32> &src, int aligned_size = 0);
-
-  absl::Status PerformSelector(const std::string &selector, const std::vector<std::string> &args,
-                               const std::vector<std::string> &template_args,
-                               std::string *result) const override;
-
-  GPUResources GetGPUResources() const override;
-  absl::Status PerformReadSelector(const std::vector<std::string> &args, std::string *result) const;
-
-  absl::Status CreateGPUObject(CLContext *context, GPUObjectPtr *result) const override;
-  void Release() override;
-};
-
-LinearStorageType DeduceLinearStorageType(TensorStorageType tensor_storage_type);
-
-// Represent GPU 1D-array of FLT4(float4/half4) values
-// Can use inside texture2d or buffer
-class LinearStorage : public GPUObject
-{
-public:
-  LinearStorage() {}
-  ~LinearStorage() override { Release(); }
-
-  // Move only
-  LinearStorage(LinearStorage &&storage);
-  LinearStorage &operator=(LinearStorage &&storage);
-  LinearStorage(const LinearStorage &) = delete;
-  LinearStorage &operator=(const LinearStorage &) = delete;
-
-  absl::Status GetGPUResources(const GPUObjectDescriptor *obj_ptr,
-                               GPUResourcesWithValue *resources) const override;
-
-  absl::Status CreateFromTensorLinearDescriptor(const TensorLinearDescriptor &desc,
-                                                CLContext *context);
-
-private:
-  void Release();
-
-  cl_mem memory_ = nullptr;
-  int depth_;
-  LinearStorageType storage_type_;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_LINEAR_STORAGE_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Model.h b/runtime/onert/backend/gpu_cl/open_cl/Model.h
deleted file mode 100644
index f434bb22f..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Model.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_MODEL_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_MODEL_H__
-
-#include <string>
-
-#include "absl/types/any.h"
-#include "InternalTensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// There is yet another representation of CNN graph. The primary purpose of this
-// representation is to simplify graph manipulation.
-
-using ValueId = uint32_t;
-
-// Used to emulate quantized behavior.
-struct QuantizationParams
-{
-  float min = 0;
-  float max = 0;
-  float scale = 0;
-};
-
-struct Operation
-{
-  std::string type;
-  absl::any attributes;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_MODEL_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ModelHints.h b/runtime/onert/backend/gpu_cl/open_cl/ModelHints.h
deleted file mode 100644
index 474c56b2a..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ModelHints.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_MODEL_HINTS_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_MODEL_HINTS_H__
-
-#include <cstdint>
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-struct ModelHints
-{
-  using ModelHint = uint64_t;
-
-  // By default we want the fastest inference.
-  static constexpr ModelHint kFastestInference = 0x00000000;
-  // Can improve compilation time, but inference can be slower.
-  static constexpr ModelHint kReduceKernelsCount = 0x00000001;
-  // Can improve tuning time, but inference can be slower.
-  static constexpr ModelHint kFastTuning = 0x00000002;
-
-  // Experimental.
-  // Can improve performance and memory consumption, but slow down
-  // initialization a lot and create more kernels.
-  static constexpr ModelHint kAllowSpecialKernels = 0x00000004;
-
-  void Add(ModelHint hint)
-  {
-    if (hint == kFastestInference)
-    {
-      hints = kFastestInference;
-    }
-    else
-    {
-      hints |= hint;
-    }
-  }
-
-  bool Check(ModelHint hint) const { return hints & hint; }
-
-  uint64_t hints = kFastestInference;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_MODEL_HINTS_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/OpenclWrapper.cc b/runtime/onert/backend/gpu_cl/open_cl/OpenclWrapper.cc
deleted file mode 100644
index dbaf6faf6..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/OpenclWrapper.cc
+++ /dev/null
@@ -1,407 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#if defined(_WIN32)
-#define __WINDOWS__
-#endif
-
-#include "OpenclWrapper.h"
-
-#ifdef __WINDOWS__
-#include <windows.h>
-#else
-#include <dlfcn.h>
-#endif
-
-#include <string>
-
-#include "absl/strings/str_cat.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-#ifdef __ANDROID__
-#define LoadFunction(function)                                                 \
-  if (use_wrapper)                                                             \
-  {                                                                            \
-    function = reinterpret_cast<PFN_##function>(loadOpenCLPointer(#function)); \
-  }                                                                            \
-  else                                                                         \
-  {                                                                            \
-    function = reinterpret_cast<PFN_##function>(dlsym(*libopencl, #function)); \
-  }
-#elif defined(__WINDOWS__)
-#define LoadFunction(function) \
-  function = reinterpret_cast<PFN_##function>(GetProcAddress(libopencl, #function));
-#else
-#define LoadFunction(function) \
-  function = reinterpret_cast<PFN_##function>(dlsym(*libopencl, #function));
-#endif
-
-#ifdef __WINDOWS__
-void LoadOpenCLFunctions(HMODULE libopencl);
-#else
-void LoadOpenCLFunctions(void **libopencl, bool use_wrapper);
-#endif
-
-absl::Status LoadOpenCL(void **libopencl)
-{
-#ifdef __WINDOWS__
-  HMODULE libopencl = LoadLibraryA("OpenCL.dll");
-  if (libopencl)
-  {
-    LoadOpenCLFunctions(libopencl);
-    return absl::OkStatus();
-  }
-  else
-  {
-    DWORD error_code = GetLastError();
-    return absl::UnknownError(
-      absl::StrCat("Can not open OpenCL library on this device, error code - ", error_code));
-  }
-#else
-  *libopencl = dlopen("libOpenCL.so", RTLD_NOW | RTLD_LOCAL);
-  if (*libopencl)
-  {
-    LoadOpenCLFunctions(libopencl, false);
-    return absl::OkStatus();
-  }
-  // record error
-  std::string error(dlerror());
-#ifdef __ANDROID__
-  // Pixel phone or auto?
-  *libopencl = dlopen("libOpenCL-pixel.so", RTLD_NOW | RTLD_LOCAL);
-  if (!*libopencl)
-  {
-    *libopencl = dlopen("libOpenCL-car.so", RTLD_NOW | RTLD_LOCAL);
-  }
-  if (*libopencl)
-  {
-    typedef void (*enableOpenCL_t)();
-    enableOpenCL_t enableOpenCL =
-      reinterpret_cast<enableOpenCL_t>(dlsym(*libopencl, "enableOpenCL"));
-    enableOpenCL();
-    LoadOpenCLFunctions(libopencl, true);
-    return absl::OkStatus();
-  }
-#endif
-  return absl::UnknownError(absl::StrCat("Can not open OpenCL library on this device - ", error));
-#endif
-}
-
-void UnloadOpenCL(void *libopencl)
-{
-  if (libopencl)
-  {
-    dlclose(libopencl);
-  }
-}
-
-#ifdef __WINDOWS__
-void LoadOpenCLFunctions(HMODULE libopencl)
-{
-#else
-#ifdef __ANDROID__
-void LoadOpenCLFunctions(void **libopencl, bool use_wrapper)
-{
-  typedef void *(*loadOpenCLPointer_t)(const char *name);
-  loadOpenCLPointer_t loadOpenCLPointer;
-  if (use_wrapper)
-  {
-    loadOpenCLPointer =
-      reinterpret_cast<loadOpenCLPointer_t>(dlsym(*libopencl, "loadOpenCLPointer"));
-  }
-#else
-void LoadOpenCLFunctions(void **libopencl, bool)
-{
-#endif // __ANDROID__
-#endif // __WINDOWS__
-
-  LoadFunction(clGetPlatformIDs);
-  LoadFunction(clGetPlatformInfo);
-  LoadFunction(clGetDeviceIDs);
-  LoadFunction(clGetDeviceInfo);
-  LoadFunction(clCreateSubDevices);
-  LoadFunction(clRetainDevice);
-  LoadFunction(clReleaseDevice);
-  LoadFunction(clCreateContext);
-  LoadFunction(clCreateContextFromType);
-  LoadFunction(clRetainContext);
-  LoadFunction(clReleaseContext);
-  LoadFunction(clGetContextInfo);
-  LoadFunction(clCreateCommandQueueWithProperties);
-  LoadFunction(clRetainCommandQueue);
-  LoadFunction(clReleaseCommandQueue);
-  LoadFunction(clGetCommandQueueInfo);
-  LoadFunction(clCreateBuffer);
-  LoadFunction(clCreateSubBuffer);
-  LoadFunction(clCreateImage);
-  LoadFunction(clCreatePipe);
-  LoadFunction(clRetainMemObject);
-  LoadFunction(clReleaseMemObject);
-  LoadFunction(clGetSupportedImageFormats);
-  LoadFunction(clGetMemObjectInfo);
-  LoadFunction(clGetImageInfo);
-  LoadFunction(clGetPipeInfo);
-  LoadFunction(clSetMemObjectDestructorCallback);
-  LoadFunction(clSVMAlloc);
-  LoadFunction(clSVMFree);
-  LoadFunction(clCreateSamplerWithProperties);
-  LoadFunction(clRetainSampler);
-  LoadFunction(clReleaseSampler);
-  LoadFunction(clGetSamplerInfo);
-  LoadFunction(clCreateProgramWithSource);
-  LoadFunction(clCreateProgramWithBinary);
-  LoadFunction(clCreateProgramWithBuiltInKernels);
-  LoadFunction(clRetainProgram);
-  LoadFunction(clReleaseProgram);
-  LoadFunction(clBuildProgram);
-  LoadFunction(clCompileProgram);
-  LoadFunction(clLinkProgram);
-  LoadFunction(clUnloadPlatformCompiler);
-  LoadFunction(clGetProgramInfo);
-  LoadFunction(clGetProgramBuildInfo);
-  LoadFunction(clCreateKernel);
-  LoadFunction(clCreateKernelsInProgram);
-  LoadFunction(clRetainKernel);
-  LoadFunction(clReleaseKernel);
-  LoadFunction(clSetKernelArg);
-  LoadFunction(clSetKernelArgSVMPointer);
-  LoadFunction(clSetKernelExecInfo);
-  LoadFunction(clGetKernelInfo);
-  LoadFunction(clGetKernelArgInfo);
-  LoadFunction(clGetKernelWorkGroupInfo);
-  LoadFunction(clWaitForEvents);
-  LoadFunction(clGetEventInfo);
-  LoadFunction(clCreateUserEvent);
-  LoadFunction(clRetainEvent);
-  LoadFunction(clReleaseEvent);
-  LoadFunction(clSetUserEventStatus);
-  LoadFunction(clSetEventCallback);
-  LoadFunction(clGetEventProfilingInfo);
-  LoadFunction(clFlush);
-  LoadFunction(clFinish);
-  LoadFunction(clEnqueueReadBuffer);
-  LoadFunction(clEnqueueReadBufferRect);
-  LoadFunction(clEnqueueWriteBuffer);
-  LoadFunction(clEnqueueWriteBufferRect);
-  LoadFunction(clEnqueueFillBuffer);
-  LoadFunction(clEnqueueCopyBuffer);
-  LoadFunction(clEnqueueCopyBufferRect);
-  LoadFunction(clEnqueueReadImage);
-  LoadFunction(clEnqueueWriteImage);
-  LoadFunction(clEnqueueFillImage);
-  LoadFunction(clEnqueueCopyImage);
-  LoadFunction(clEnqueueCopyImageToBuffer);
-  LoadFunction(clEnqueueCopyBufferToImage);
-  LoadFunction(clEnqueueMapBuffer);
-  LoadFunction(clEnqueueMapImage);
-  LoadFunction(clEnqueueUnmapMemObject);
-  LoadFunction(clEnqueueMigrateMemObjects);
-  LoadFunction(clEnqueueNDRangeKernel);
-  LoadFunction(clEnqueueNativeKernel);
-  LoadFunction(clEnqueueMarkerWithWaitList);
-  LoadFunction(clEnqueueBarrierWithWaitList);
-  LoadFunction(clEnqueueSVMFree);
-  LoadFunction(clEnqueueSVMMemcpy);
-  LoadFunction(clEnqueueSVMMemFill);
-  LoadFunction(clEnqueueSVMMap);
-  LoadFunction(clEnqueueSVMUnmap);
-  LoadFunction(clGetExtensionFunctionAddressForPlatform);
-  LoadFunction(clCreateImage2D);
-  LoadFunction(clCreateImage3D);
-  LoadFunction(clEnqueueMarker);
-  LoadFunction(clEnqueueWaitForEvents);
-  LoadFunction(clEnqueueBarrier);
-  LoadFunction(clUnloadCompiler);
-  LoadFunction(clGetExtensionFunctionAddress);
-  LoadFunction(clCreateCommandQueue);
-  LoadFunction(clCreateSampler);
-  LoadFunction(clEnqueueTask);
-
-  // OpenGL sharing
-  LoadFunction(clCreateFromGLBuffer);
-  LoadFunction(clCreateFromGLTexture);
-  LoadFunction(clEnqueueAcquireGLObjects);
-  LoadFunction(clEnqueueReleaseGLObjects);
-
-  // cl_khr_egl_event extension
-  LoadFunction(clCreateEventFromEGLSyncKHR);
-
-  // EGL sharing
-  LoadFunction(clCreateFromEGLImageKHR);
-  LoadFunction(clEnqueueAcquireEGLObjectsKHR);
-  LoadFunction(clEnqueueReleaseEGLObjectsKHR);
-} // namespace gpu_cl
-
-// No OpenCL support, do not set function addresses
-PFN_clGetPlatformIDs clGetPlatformIDs;
-PFN_clGetPlatformInfo clGetPlatformInfo;
-PFN_clGetDeviceIDs clGetDeviceIDs;
-PFN_clGetDeviceInfo clGetDeviceInfo;
-PFN_clCreateSubDevices clCreateSubDevices;
-PFN_clRetainDevice clRetainDevice;
-PFN_clReleaseDevice clReleaseDevice;
-PFN_clCreateContext clCreateContext;
-PFN_clCreateContextFromType clCreateContextFromType;
-PFN_clRetainContext clRetainContext;
-PFN_clReleaseContext clReleaseContext;
-PFN_clGetContextInfo clGetContextInfo;
-PFN_clCreateCommandQueueWithProperties clCreateCommandQueueWithProperties;
-PFN_clRetainCommandQueue clRetainCommandQueue;
-PFN_clReleaseCommandQueue clReleaseCommandQueue;
-PFN_clGetCommandQueueInfo clGetCommandQueueInfo;
-PFN_clCreateBuffer clCreateBuffer;
-PFN_clCreateSubBuffer clCreateSubBuffer;
-PFN_clCreateImage clCreateImage;
-PFN_clCreatePipe clCreatePipe;
-PFN_clRetainMemObject clRetainMemObject;
-PFN_clReleaseMemObject clReleaseMemObject;
-PFN_clGetSupportedImageFormats clGetSupportedImageFormats;
-PFN_clGetMemObjectInfo clGetMemObjectInfo;
-PFN_clGetImageInfo clGetImageInfo;
-PFN_clGetPipeInfo clGetPipeInfo;
-PFN_clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback;
-PFN_clSVMAlloc clSVMAlloc;
-PFN_clSVMFree clSVMFree;
-PFN_clCreateSamplerWithProperties clCreateSamplerWithProperties;
-PFN_clRetainSampler clRetainSampler;
-PFN_clReleaseSampler clReleaseSampler;
-PFN_clGetSamplerInfo clGetSamplerInfo;
-PFN_clCreateProgramWithSource clCreateProgramWithSource;
-PFN_clCreateProgramWithBinary clCreateProgramWithBinary;
-PFN_clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels;
-PFN_clRetainProgram clRetainProgram;
-PFN_clReleaseProgram clReleaseProgram;
-PFN_clBuildProgram clBuildProgram;
-PFN_clCompileProgram clCompileProgram;
-PFN_clLinkProgram clLinkProgram;
-PFN_clUnloadPlatformCompiler clUnloadPlatformCompiler;
-PFN_clGetProgramInfo clGetProgramInfo;
-PFN_clGetProgramBuildInfo clGetProgramBuildInfo;
-PFN_clCreateKernel clCreateKernel;
-PFN_clCreateKernelsInProgram clCreateKernelsInProgram;
-PFN_clRetainKernel clRetainKernel;
-PFN_clReleaseKernel clReleaseKernel;
-PFN_clSetKernelArg clSetKernelArg;
-PFN_clSetKernelArgSVMPointer clSetKernelArgSVMPointer;
-PFN_clSetKernelExecInfo clSetKernelExecInfo;
-PFN_clGetKernelInfo clGetKernelInfo;
-PFN_clGetKernelArgInfo clGetKernelArgInfo;
-PFN_clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo;
-PFN_clWaitForEvents clWaitForEvents;
-PFN_clGetEventInfo clGetEventInfo;
-PFN_clCreateUserEvent clCreateUserEvent;
-PFN_clRetainEvent clRetainEvent;
-PFN_clReleaseEvent clReleaseEvent;
-PFN_clSetUserEventStatus clSetUserEventStatus;
-PFN_clSetEventCallback clSetEventCallback;
-PFN_clGetEventProfilingInfo clGetEventProfilingInfo;
-PFN_clFlush clFlush;
-PFN_clFinish clFinish;
-PFN_clEnqueueReadBuffer clEnqueueReadBuffer;
-PFN_clEnqueueReadBufferRect clEnqueueReadBufferRect;
-PFN_clEnqueueWriteBuffer clEnqueueWriteBuffer;
-PFN_clEnqueueWriteBufferRect clEnqueueWriteBufferRect;
-PFN_clEnqueueFillBuffer clEnqueueFillBuffer;
-PFN_clEnqueueCopyBuffer clEnqueueCopyBuffer;
-PFN_clEnqueueCopyBufferRect clEnqueueCopyBufferRect;
-PFN_clEnqueueReadImage clEnqueueReadImage;
-PFN_clEnqueueWriteImage clEnqueueWriteImage;
-PFN_clEnqueueFillImage clEnqueueFillImage;
-PFN_clEnqueueCopyImage clEnqueueCopyImage;
-PFN_clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer;
-PFN_clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage;
-PFN_clEnqueueMapBuffer clEnqueueMapBuffer;
-PFN_clEnqueueMapImage clEnqueueMapImage;
-PFN_clEnqueueUnmapMemObject clEnqueueUnmapMemObject;
-PFN_clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects;
-PFN_clEnqueueNDRangeKernel clEnqueueNDRangeKernel;
-PFN_clEnqueueNativeKernel clEnqueueNativeKernel;
-PFN_clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList;
-PFN_clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList;
-PFN_clEnqueueSVMFree clEnqueueSVMFree;
-PFN_clEnqueueSVMMemcpy clEnqueueSVMMemcpy;
-PFN_clEnqueueSVMMemFill clEnqueueSVMMemFill;
-PFN_clEnqueueSVMMap clEnqueueSVMMap;
-PFN_clEnqueueSVMUnmap clEnqueueSVMUnmap;
-PFN_clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform;
-PFN_clCreateImage2D clCreateImage2D;
-PFN_clCreateImage3D clCreateImage3D;
-PFN_clEnqueueMarker clEnqueueMarker;
-PFN_clEnqueueWaitForEvents clEnqueueWaitForEvents;
-PFN_clEnqueueBarrier clEnqueueBarrier;
-PFN_clUnloadCompiler clUnloadCompiler;
-PFN_clGetExtensionFunctionAddress clGetExtensionFunctionAddress;
-PFN_clCreateCommandQueue clCreateCommandQueue;
-PFN_clCreateSampler clCreateSampler;
-PFN_clEnqueueTask clEnqueueTask;
-
-// OpenGL sharing
-PFN_clCreateFromGLBuffer clCreateFromGLBuffer;
-PFN_clCreateFromGLTexture clCreateFromGLTexture;
-PFN_clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects;
-PFN_clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects;
-
-// cl_khr_egl_event extension
-PFN_clCreateEventFromEGLSyncKHR clCreateEventFromEGLSyncKHR;
-
-// EGL sharing
-PFN_clCreateFromEGLImageKHR clCreateFromEGLImageKHR;
-PFN_clEnqueueAcquireEGLObjectsKHR clEnqueueAcquireEGLObjectsKHR;
-PFN_clEnqueueReleaseEGLObjectsKHR clEnqueueReleaseEGLObjectsKHR;
-
-cl_mem CreateImage2DLegacy(cl_context context, cl_mem_flags flags,
-                           const cl_image_format *image_format, const cl_image_desc *image_desc,
-                           void *host_ptr, cl_int *errcode_ret)
-{
-  if (clCreateImage)
-  { // clCreateImage available since OpenCL 1.2
-    return clCreateImage(context, flags, image_format, image_desc, host_ptr, errcode_ret);
-  }
-  else
-  {
-    return clCreateImage2D(context, flags, image_format, image_desc->image_width,
-                           image_desc->image_height, image_desc->image_row_pitch, host_ptr,
-                           errcode_ret);
-  }
-}
-
-cl_mem CreateImage3DLegacy(cl_context context, cl_mem_flags flags,
-                           const cl_image_format *image_format, const cl_image_desc *image_desc,
-                           void *host_ptr, cl_int *errcode_ret)
-{
-  if (clCreateImage)
-  { // clCreateImage available since OpenCL 1.2
-    return clCreateImage(context, flags, image_format, image_desc, host_ptr, errcode_ret);
-  }
-  else
-  {
-    return clCreateImage3D(context, flags, image_format, image_desc->image_width,
-                           image_desc->image_height, image_desc->image_depth,
-                           image_desc->image_row_pitch, image_desc->image_slice_pitch, host_ptr,
-                           errcode_ret);
-  }
-}
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/OpenclWrapper.h b/runtime/onert/backend/gpu_cl/open_cl/OpenclWrapper.h
deleted file mode 100644
index 021f8735a..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/OpenclWrapper.h
+++ /dev/null
@@ -1,560 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_WRAPPERE_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_WRAPPERE_H__
-
-#include "CL/cl.h"
-#include "CL/cl_egl.h"
-#include "CL/cl_ext.h"
-#include "CL/cl_gl.h"
-#include "CL/cl_platform.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-absl::Status LoadOpenCL(void **libopencl);
-void UnloadOpenCL(void *libopencl);
-
-typedef cl_int(CL_API_CALL *PFN_clGetPlatformIDs)(
-  cl_uint /* num_entries */, cl_platform_id * /* platforms */,
-  cl_uint * /* num_platforms */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetPlatformInfo)(
-  cl_platform_id /* platform */, cl_platform_info /* param_name */, size_t /* param_value_size */,
-  void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetDeviceIDs)(
-  cl_platform_id /* platform */, cl_device_type /* device_type */, cl_uint /* num_entries */,
-  cl_device_id * /* devices */, cl_uint * /* num_devices */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetDeviceInfo)(
-  cl_device_id /* device */, cl_device_info /* param_name */, size_t /* param_value_size */,
-  void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clCreateSubDevices)(
-  cl_device_id /* in_device */, const cl_device_partition_property * /* properties */,
-  cl_uint /* num_devices */, cl_device_id * /* out_devices */,
-  cl_uint * /* num_devices_ret */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clRetainDevice)(cl_device_id /* device */)
-  CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clReleaseDevice)(cl_device_id /* device */)
-  CL_API_SUFFIX__VERSION_1_2;
-typedef cl_context(CL_API_CALL *PFN_clCreateContext)(
-  const cl_context_properties * /* properties */, cl_uint /* num_devices */,
-  const cl_device_id * /* devices */,
-  void(CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *),
-  void * /* user_data */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_context(CL_API_CALL *PFN_clCreateContextFromType)(
-  const cl_context_properties * /* properties */, cl_device_type /* device_type */,
-  void(CL_CALLBACK * /* pfn_notify*/)(const char *, const void *, size_t, void *),
-  void * /* user_data */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clRetainContext)(cl_context /* context */)
-  CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clReleaseContext)(cl_context /* context */)
-  CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetContextInfo)(
-  cl_context /* context */, cl_context_info /* param_name */, size_t /* param_value_size */,
-  void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_command_queue(CL_API_CALL *PFN_clCreateCommandQueueWithProperties)(
-  cl_context /* context */, cl_device_id /* device */, const cl_queue_properties * /* properties */,
-  cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_int(CL_API_CALL *PFN_clRetainCommandQueue)(cl_command_queue /* command_queue */)
-  CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clReleaseCommandQueue)(cl_command_queue /* command_queue */)
-  CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetCommandQueueInfo)(
-  cl_command_queue /* command_queue */, cl_command_queue_info /* param_name */,
-  size_t /* param_value_size */, void * /* param_value */,
-  size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_mem(CL_API_CALL *PFN_clCreateBuffer)(
-  cl_context /* context */, cl_mem_flags /* flags */, size_t /* size */, void * /* host_ptr */,
-  cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_mem(CL_API_CALL *PFN_clCreateSubBuffer)(
-  cl_mem /* buffer */, cl_mem_flags /* flags */, cl_buffer_create_type /* buffer_create_type */,
-  const void * /* buffer_create_info */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1;
-typedef cl_mem(CL_API_CALL *PFN_clCreateImage)(
-  cl_context /* context */, cl_mem_flags /* flags */, const cl_image_format * /* image_format */,
-  const cl_image_desc * /* image_desc */, void * /* host_ptr */,
-  cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_mem(CL_API_CALL *PFN_clCreatePipe)(
-  cl_context /* context */, cl_mem_flags /* flags */, cl_uint /* pipe_packet_size */,
-  cl_uint /* pipe_max_packets */, const cl_pipe_properties * /* properties */,
-  cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_int(CL_API_CALL *PFN_clRetainMemObject)(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clReleaseMemObject)(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetSupportedImageFormats)(
-  cl_context /* context */, cl_mem_flags /* flags */, cl_mem_object_type /* image_type */,
-  cl_uint /* num_entries */, cl_image_format * /* image_formats */,
-  cl_uint * /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetMemObjectInfo)(
-  cl_mem /* memobj */, cl_mem_info /* param_name */, size_t /* param_value_size */,
-  void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetImageInfo)(
-  cl_mem /* image */, cl_image_info /* param_name */, size_t /* param_value_size */,
-  void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetPipeInfo)(
-  cl_mem /* pipe */, cl_pipe_info /* param_name */, size_t /* param_value_size */,
-  void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_int(CL_API_CALL *PFN_clSetMemObjectDestructorCallback)(
-  cl_mem /* memobj */,
-  void(CL_CALLBACK * /*pfn_notify*/)(cl_mem /* memobj */, void * /*user_data*/),
-  void * /*user_data */) CL_API_SUFFIX__VERSION_1_1;
-typedef void *(CL_API_CALL *PFN_clSVMAlloc)(cl_context /* context */, cl_svm_mem_flags /* flags */,
-                                            size_t /* size */,
-                                            cl_uint /* alignment */)CL_API_SUFFIX__VERSION_2_0;
-typedef void(CL_API_CALL *PFN_clSVMFree)(cl_context /* context */,
-                                         void * /* svm_pointer */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_sampler(CL_API_CALL *PFN_clCreateSamplerWithProperties)(
-  cl_context /* context */, const cl_sampler_properties * /* normalized_coords */,
-  cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_int(CL_API_CALL *PFN_clRetainSampler)(cl_sampler /* sampler */)
-  CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clReleaseSampler)(cl_sampler /* sampler */)
-  CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetSamplerInfo)(
-  cl_sampler /* sampler */, cl_sampler_info /* param_name */, size_t /* param_value_size */,
-  void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_program(CL_API_CALL *PFN_clCreateProgramWithSource)(
-  cl_context /* context */, cl_uint /* count */, const char ** /* strings */,
-  const size_t * /* lengths */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_program(CL_API_CALL *PFN_clCreateProgramWithBinary)(
-  cl_context /* context */, cl_uint /* num_devices */, const cl_device_id * /* device_list */,
-  const size_t * /* lengths */, const unsigned char ** /* binaries */, cl_int * /* binary_status */,
-  cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_program(CL_API_CALL *PFN_clCreateProgramWithBuiltInKernels)(
-  cl_context /* context */, cl_uint /* num_devices */, const cl_device_id * /* device_list */,
-  const char * /* kernel_names */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clRetainProgram)(cl_program /* program */)
-  CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clReleaseProgram)(cl_program /* program */)
-  CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clBuildProgram)(
-  cl_program /* program */, cl_uint /* num_devices */, const cl_device_id * /* device_list */,
-  const char * /* options */,
-  void(CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
-  void * /* user_data */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clCompileProgram)(
-  cl_program /* program */, cl_uint /* num_devices */, const cl_device_id * /* device_list */,
-  const char * /* options */, cl_uint /* num_input_headers */,
-  const cl_program * /* input_headers */, const char ** /* header_include_names */,
-  void(CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
-  void * /* user_data */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_program(CL_API_CALL *PFN_clLinkProgram)(
-  cl_context /* context */, cl_uint /* num_devices */, const cl_device_id * /* device_list */,
-  const char * /* options */, cl_uint /* num_input_programs */,
-  const cl_program * /* input_programs */,
-  void(CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
-  void * /* user_data */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clUnloadPlatformCompiler)(cl_platform_id /* platform */)
-  CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clGetProgramInfo)(
-  cl_program /* program */, cl_program_info /* param_name */, size_t /* param_value_size */,
-  void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetProgramBuildInfo)(
-  cl_program /* program */, cl_device_id /* device */, cl_program_build_info /* param_name */,
-  size_t /* param_value_size */, void * /* param_value */,
-  size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_kernel(CL_API_CALL *PFN_clCreateKernel)(
-  cl_program /* program */, const char * /* kernel_name */,
-  cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clCreateKernelsInProgram)(
-  cl_program /* program */, cl_uint /* num_kernels */, cl_kernel * /* kernels */,
-  cl_uint * /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clRetainKernel)(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clReleaseKernel)(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clSetKernelArg)(cl_kernel /* kernel */, cl_uint /* arg_index */,
-                                                size_t /* arg_size */, const void * /* arg_value */)
-  CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clSetKernelArgSVMPointer)(
-  cl_kernel /* kernel */, cl_uint /* arg_index */,
-  const void * /* arg_value */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_int(CL_API_CALL *PFN_clSetKernelExecInfo)(
-  cl_kernel /* kernel */, cl_kernel_exec_info /* param_name */, size_t /* param_value_size */,
-  const void * /* param_value */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_int(CL_API_CALL *PFN_clGetKernelInfo)(
-  cl_kernel /* kernel */, cl_kernel_info /* param_name */, size_t /* param_value_size */,
-  void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetKernelArgInfo)(
-  cl_kernel /* kernel */, cl_uint /* arg_indx */, cl_kernel_arg_info /* param_name */,
-  size_t /* param_value_size */, void * /* param_value */,
-  size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clGetKernelWorkGroupInfo)(
-  cl_kernel /* kernel */, cl_device_id /* device */, cl_kernel_work_group_info /* param_name */,
-  size_t /* param_value_size */, void * /* param_value */,
-  size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clWaitForEvents)(
-  cl_uint /* num_events */, const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clGetEventInfo)(
-  cl_event /* event */, cl_event_info /* param_name */, size_t /* param_value_size */,
-  void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_event(CL_API_CALL *PFN_clCreateUserEvent)(
-  cl_context /* context */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1;
-typedef cl_int(CL_API_CALL *PFN_clRetainEvent)(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clReleaseEvent)(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clSetUserEventStatus)(
-  cl_event /* event */, cl_int /* execution_status */) CL_API_SUFFIX__VERSION_1_1;
-typedef cl_int(CL_API_CALL *PFN_clSetEventCallback)(
-  cl_event /* event */, cl_int /* command_exec_callback_type */,
-  void(CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *),
-  void * /* user_data */) CL_API_SUFFIX__VERSION_1_1;
-typedef cl_int(CL_API_CALL *PFN_clGetEventProfilingInfo)(
-  cl_event /* event */, cl_profiling_info /* param_name */, size_t /* param_value_size */,
-  void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clFlush)(cl_command_queue /* command_queue */)
-  CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clFinish)(cl_command_queue /* command_queue */)
-  CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueReadBuffer)(
-  cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */,
-  size_t /* offset */, size_t /* size */, void * /* ptr */, cl_uint /* num_events_in_wait_list */,
-  const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueReadBufferRect)(
-  cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_read */,
-  const size_t * /* buffer_offset */, const size_t * /* host_offset */, const size_t * /* region */,
-  size_t /* buffer_row_pitch */, size_t /* buffer_slice_pitch */, size_t /* host_row_pitch */,
-  size_t /* host_slice_pitch */, void * /* ptr */, cl_uint /* num_events_in_wait_list */,
-  const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueWriteBuffer)(
-  cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_write */,
-  size_t /* offset */, size_t /* size */, const void * /* ptr */,
-  cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
-  cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueWriteBufferRect)(
-  cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_write */,
-  const size_t * /* buffer_offset */, const size_t * /* host_offset */, const size_t * /* region */,
-  size_t /* buffer_row_pitch */, size_t /* buffer_slice_pitch */, size_t /* host_row_pitch */,
-  size_t /* host_slice_pitch */, const void * /* ptr */, cl_uint /* num_events_in_wait_list */,
-  const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueFillBuffer)(
-  cl_command_queue /* command_queue */, cl_mem /* buffer */, const void * /* pattern */,
-  size_t /* pattern_size */, size_t /* offset */, size_t /* size */,
-  cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
-  cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueCopyBuffer)(
-  cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_buffer */,
-  size_t /* src_offset */, size_t /* dst_offset */, size_t /* size */,
-  cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
-  cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueCopyBufferRect)(
-  cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_buffer */,
-  const size_t * /* src_origin */, const size_t * /* dst_origin */, const size_t * /* region */,
-  size_t /* src_row_pitch */, size_t /* src_slice_pitch */, size_t /* dst_row_pitch */,
-  size_t /* dst_slice_pitch */, cl_uint /* num_events_in_wait_list */,
-  const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueReadImage)(
-  cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_read */,
-  const size_t * /* origin[3] */, const size_t * /* region[3] */, size_t /* row_pitch */,
-  size_t /* slice_pitch */, void * /* ptr */, cl_uint /* num_events_in_wait_list */,
-  const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueWriteImage)(
-  cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_write */,
-  const size_t * /* origin[3] */, const size_t * /* region[3] */, size_t /* input_row_pitch */,
-  size_t /* input_slice_pitch */, const void * /* ptr */, cl_uint /* num_events_in_wait_list */,
-  const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueFillImage)(
-  cl_command_queue /* command_queue */, cl_mem /* image */, const void * /* fill_color */,
-  const size_t * /* origin[3] */, const size_t * /* region[3] */,
-  cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
-  cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueCopyImage)(
-  cl_command_queue /* command_queue */, cl_mem /* src_image */, cl_mem /* dst_image */,
-  const size_t * /* src_origin[3] */, const size_t * /* dst_origin[3] */,
-  const size_t * /* region[3] */, cl_uint /* num_events_in_wait_list */,
-  const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueCopyImageToBuffer)(
-  cl_command_queue /* command_queue */, cl_mem /* src_image */, cl_mem /* dst_buffer */,
-  const size_t * /* src_origin[3] */, const size_t * /* region[3] */, size_t /* dst_offset */,
-  cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
-  cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueCopyBufferToImage)(
-  cl_command_queue /* command_queue */, cl_mem /* src_buffer */, cl_mem /* dst_image */,
-  size_t /* src_offset */, const size_t * /* dst_origin[3] */, const size_t * /* region[3] */,
-  cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
-  cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef void *(CL_API_CALL *PFN_clEnqueueMapBuffer)(
-  cl_command_queue /* command_queue */, cl_mem /* buffer */, cl_bool /* blocking_map */,
-  cl_map_flags /* map_flags */, size_t /* offset */, size_t /* size */,
-  cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
-  cl_event * /* event */, cl_int * /* errcode_ret */)CL_API_SUFFIX__VERSION_1_0;
-typedef void *(CL_API_CALL *PFN_clEnqueueMapImage)(
-  cl_command_queue /* command_queue */, cl_mem /* image */, cl_bool /* blocking_map */,
-  cl_map_flags /* map_flags */, const size_t * /* origin[3] */, const size_t * /* region[3] */,
-  size_t * /* image_row_pitch */, size_t * /* image_slice_pitch */,
-  cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
-  cl_event * /* event */, cl_int * /* errcode_ret */)CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueUnmapMemObject)(
-  cl_command_queue /* command_queue */, cl_mem /* memobj */, void * /* mapped_ptr */,
-  cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
-  cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueMigrateMemObjects)(
-  cl_command_queue /* command_queue */, cl_uint /* num_mem_objects */,
-  const cl_mem * /* mem_objects */, cl_mem_migration_flags /* flags */,
-  cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
-  cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueNDRangeKernel)(
-  cl_command_queue /* command_queue */, cl_kernel /* kernel */, cl_uint /* work_dim */,
-  const size_t * /* global_work_offset */, const size_t * /* global_work_size */,
-  const size_t * /* local_work_size */, cl_uint /* num_events_in_wait_list */,
-  const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueNativeKernel)(
-  cl_command_queue /* command_queue */, void(CL_CALLBACK * /*user_func*/)(void *),
-  void * /* args */, size_t /* cb_args */, cl_uint /* num_mem_objects */,
-  const cl_mem * /* mem_list */, const void ** /* args_mem_loc */,
-  cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
-  cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueMarkerWithWaitList)(
-  cl_command_queue /* command_queue */, cl_uint /* num_events_in_wait_list */,
-  const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueBarrierWithWaitList)(
-  cl_command_queue /* command_queue */, cl_uint /* num_events_in_wait_list */,
-  const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueSVMFree)(
-  cl_command_queue /* command_queue */, cl_uint /* num_svm_pointers */,
-  void *[] /* svm_pointers[] */,
-  void(CL_CALLBACK * /*pfn_free_func*/)(cl_command_queue /* queue */,
-                                        cl_uint /* num_svm_pointers */,
-                                        void *[] /* svm_pointers[] */, void * /* user_data */),
-  void * /* user_data */, cl_uint /* num_events_in_wait_list */,
-  const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueSVMMemcpy)(
-  cl_command_queue /* command_queue */, cl_bool /* blocking_copy */, void * /* dst_ptr */,
-  const void * /* src_ptr */, size_t /* size */, cl_uint /* num_events_in_wait_list */,
-  const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueSVMMemFill)(
-  cl_command_queue /* command_queue */, void * /* svm_ptr */, const void * /* pattern */,
-  size_t /* pattern_size */, size_t /* size */, cl_uint /* num_events_in_wait_list */,
-  const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueSVMMap)(
-  cl_command_queue /* command_queue */, cl_bool /* blocking_map */, cl_map_flags /* flags */,
-  void * /* svm_ptr */, size_t /* size */, cl_uint /* num_events_in_wait_list */,
-  const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueSVMUnmap)(
-  cl_command_queue /* command_queue */, void * /* svm_ptr */, cl_uint /* num_events_in_wait_list */,
-  const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0;
-typedef void *(CL_API_CALL *PFN_clGetExtensionFunctionAddressForPlatform)(
-  cl_platform_id /* platform */, const char * /* func_name */)CL_API_SUFFIX__VERSION_1_2;
-typedef cl_mem(CL_API_CALL *PFN_clCreateImage2D)(cl_context /* context */, cl_mem_flags /* flags */,
-                                                 const cl_image_format * /* image_format */,
-                                                 size_t /* image_width */,
-                                                 size_t /* image_height */,
-                                                 size_t /* image_row_pitch */,
-                                                 void * /* host_ptr */, cl_int * /* errcode_ret */);
-typedef cl_mem(CL_API_CALL *PFN_clCreateImage3D)(
-  cl_context /* context */, cl_mem_flags /* flags */, const cl_image_format * /* image_format */,
-  size_t /* image_width */, size_t /* image_height */, size_t /* image_depth */,
-  size_t /* image_row_pitch */, size_t /* image_slice_pitch */, void * /* host_ptr */,
-  cl_int * /* errcode_ret */);
-typedef cl_int(CL_API_CALL *PFN_clEnqueueMarker)(cl_command_queue /* command_queue */,
-                                                 cl_event * /* event */);
-typedef cl_int(CL_API_CALL *PFN_clEnqueueWaitForEvents)(cl_command_queue /* command_queue */,
-                                                        cl_uint /* num_events */,
-                                                        const cl_event * /* event_list */);
-typedef cl_int(CL_API_CALL *PFN_clEnqueueBarrier)(cl_command_queue /* command_queue */);
-typedef cl_int(CL_API_CALL *PFN_clUnloadCompiler)();
-typedef void *(CL_API_CALL *PFN_clGetExtensionFunctionAddress)(const char * /* func_name */);
-typedef cl_command_queue(CL_API_CALL *PFN_clCreateCommandQueue)(
-  cl_context /* context */, cl_device_id /* device */, cl_command_queue_properties /* properties */,
-  cl_int * /* errcode_ret */);
-typedef cl_sampler(CL_API_CALL *PFN_clCreateSampler)(cl_context /* context */,
-                                                     cl_bool /* normalized_coords */,
-                                                     cl_addressing_mode /* addressing_mode */,
-                                                     cl_filter_mode /* filter_mode */,
-                                                     cl_int * /* errcode_ret */);
-typedef cl_int(CL_API_CALL *PFN_clEnqueueTask)(cl_command_queue /* command_queue */,
-                                               cl_kernel /* kernel */,
-                                               cl_uint /* num_events_in_wait_list */,
-                                               const cl_event * /* event_wait_list */,
-                                               cl_event * /* event */);
-
-// OpenGL sharing
-typedef cl_mem(CL_API_CALL *PFN_clCreateFromGLBuffer)(cl_context, cl_mem_flags, cl_GLuint, int *);
-typedef cl_mem(CL_API_CALL *PFN_clCreateFromGLTexture)(
-  cl_context /* context */, cl_mem_flags /* flags */, cl_GLenum /* target */,
-  cl_GLint /* miplevel */, cl_GLuint /* texture */,
-  cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
-typedef cl_int(CL_API_CALL *PFN_clEnqueueAcquireGLObjects)(cl_command_queue /* command_queue */,
-                                                           cl_uint /* num_objects */,
-                                                           const cl_mem * /* mem_objects */,
-                                                           cl_uint /* num_events_in_wait_list */,
-                                                           const cl_event * /* event_wait_list */,
-                                                           cl_event * /* event */);
-typedef cl_int(CL_API_CALL *PFN_clEnqueueReleaseGLObjects)(
-  cl_command_queue /* command_queue */, cl_uint /* num_objects */, const cl_mem * /* mem_objects */,
-  cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */,
-  cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
-
-// cl_khr_egl_event extension
-
-// CLeglDisplayKHR is an opaque handle to an EGLDisplay
-typedef void *CLeglDisplayKHR;
-
-// CLeglSyncKHR is an opaque handle to an EGLSync object
-typedef void *CLeglSyncKHR;
-
-typedef cl_event(CL_API_CALL *PFN_clCreateEventFromEGLSyncKHR)(cl_context /* context */,
-                                                               CLeglSyncKHR /* sync */,
-                                                               CLeglDisplayKHR /* display */,
-                                                               cl_int * /* errcode_ret */);
-
-// EGL sharing
-typedef cl_mem(CL_API_CALL *PFN_clCreateFromEGLImageKHR)(
-  cl_context /*context*/, CLeglDisplayKHR /*display*/, CLeglImageKHR /*image*/,
-  cl_mem_flags /*flags*/, const cl_egl_image_properties_khr * /*properties*/,
-  cl_int * /*errcode_ret*/);
-typedef cl_int(CL_API_CALL *PFN_clEnqueueAcquireEGLObjectsKHR)(
-  cl_command_queue /*command_queue*/, cl_uint /*num_objects*/, const cl_mem * /*mem_objects*/,
-  cl_uint /*num_events_in_wait_list*/, const cl_event * /*event_wait_list*/, cl_event * /*event*/);
-typedef cl_int(CL_API_CALL *PFN_clEnqueueReleaseEGLObjectsKHR)(
-  cl_command_queue /*command_queue*/, cl_uint /*num_objects*/, const cl_mem * /*mem_objects*/,
-  cl_uint /*num_events_in_wait_list*/, const cl_event * /*event_wait_list*/, cl_event * /*event*/);
-
-extern PFN_clGetPlatformIDs clGetPlatformIDs;
-extern PFN_clGetPlatformInfo clGetPlatformInfo;
-extern PFN_clGetDeviceIDs clGetDeviceIDs;
-extern PFN_clGetDeviceInfo clGetDeviceInfo;
-extern PFN_clCreateSubDevices clCreateSubDevices;
-extern PFN_clRetainDevice clRetainDevice;
-extern PFN_clReleaseDevice clReleaseDevice;
-extern PFN_clCreateContext clCreateContext;
-extern PFN_clCreateContextFromType clCreateContextFromType;
-extern PFN_clRetainContext clRetainContext;
-extern PFN_clReleaseContext clReleaseContext;
-extern PFN_clGetContextInfo clGetContextInfo;
-extern PFN_clCreateCommandQueueWithProperties clCreateCommandQueueWithProperties;
-extern PFN_clRetainCommandQueue clRetainCommandQueue;
-extern PFN_clReleaseCommandQueue clReleaseCommandQueue;
-extern PFN_clGetCommandQueueInfo clGetCommandQueueInfo;
-extern PFN_clCreateBuffer clCreateBuffer;
-extern PFN_clCreateSubBuffer clCreateSubBuffer;
-extern PFN_clCreateImage clCreateImage;
-extern PFN_clCreatePipe clCreatePipe;
-extern PFN_clRetainMemObject clRetainMemObject;
-extern PFN_clReleaseMemObject clReleaseMemObject;
-extern PFN_clGetSupportedImageFormats clGetSupportedImageFormats;
-extern PFN_clGetMemObjectInfo clGetMemObjectInfo;
-extern PFN_clGetImageInfo clGetImageInfo;
-extern PFN_clGetPipeInfo clGetPipeInfo;
-extern PFN_clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback;
-extern PFN_clSVMAlloc clSVMAlloc;
-extern PFN_clSVMFree clSVMFree;
-extern PFN_clCreateSamplerWithProperties clCreateSamplerWithProperties;
-extern PFN_clRetainSampler clRetainSampler;
-extern PFN_clReleaseSampler clReleaseSampler;
-extern PFN_clGetSamplerInfo clGetSamplerInfo;
-extern PFN_clCreateProgramWithSource clCreateProgramWithSource;
-extern PFN_clCreateProgramWithBinary clCreateProgramWithBinary;
-extern PFN_clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels;
-extern PFN_clRetainProgram clRetainProgram;
-extern PFN_clReleaseProgram clReleaseProgram;
-extern PFN_clBuildProgram clBuildProgram;
-extern PFN_clCompileProgram clCompileProgram;
-extern PFN_clLinkProgram clLinkProgram;
-extern PFN_clUnloadPlatformCompiler clUnloadPlatformCompiler;
-extern PFN_clGetProgramInfo clGetProgramInfo;
-extern PFN_clGetProgramBuildInfo clGetProgramBuildInfo;
-extern PFN_clCreateKernel clCreateKernel;
-extern PFN_clCreateKernelsInProgram clCreateKernelsInProgram;
-extern PFN_clRetainKernel clRetainKernel;
-extern PFN_clReleaseKernel clReleaseKernel;
-extern PFN_clSetKernelArg clSetKernelArg;
-extern PFN_clSetKernelArgSVMPointer clSetKernelArgSVMPointer;
-extern PFN_clSetKernelExecInfo clSetKernelExecInfo;
-extern PFN_clGetKernelInfo clGetKernelInfo;
-extern PFN_clGetKernelArgInfo clGetKernelArgInfo;
-extern PFN_clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo;
-extern PFN_clWaitForEvents clWaitForEvents;
-extern PFN_clGetEventInfo clGetEventInfo;
-extern PFN_clCreateUserEvent clCreateUserEvent;
-extern PFN_clRetainEvent clRetainEvent;
-extern PFN_clReleaseEvent clReleaseEvent;
-extern PFN_clSetUserEventStatus clSetUserEventStatus;
-extern PFN_clSetEventCallback clSetEventCallback;
-extern PFN_clGetEventProfilingInfo clGetEventProfilingInfo;
-extern PFN_clFlush clFlush;
-extern PFN_clFinish clFinish;
-extern PFN_clEnqueueReadBuffer clEnqueueReadBuffer;
-extern PFN_clEnqueueReadBufferRect clEnqueueReadBufferRect;
-extern PFN_clEnqueueWriteBuffer clEnqueueWriteBuffer;
-extern PFN_clEnqueueWriteBufferRect clEnqueueWriteBufferRect;
-extern PFN_clEnqueueFillBuffer clEnqueueFillBuffer;
-extern PFN_clEnqueueCopyBuffer clEnqueueCopyBuffer;
-extern PFN_clEnqueueCopyBufferRect clEnqueueCopyBufferRect;
-extern PFN_clEnqueueReadImage clEnqueueReadImage;
-extern PFN_clEnqueueWriteImage clEnqueueWriteImage;
-extern PFN_clEnqueueFillImage clEnqueueFillImage;
-extern PFN_clEnqueueCopyImage clEnqueueCopyImage;
-extern PFN_clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer;
-extern PFN_clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage;
-extern PFN_clEnqueueMapBuffer clEnqueueMapBuffer;
-extern PFN_clEnqueueMapImage clEnqueueMapImage;
-extern PFN_clEnqueueUnmapMemObject clEnqueueUnmapMemObject;
-extern PFN_clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects;
-extern PFN_clEnqueueNDRangeKernel clEnqueueNDRangeKernel;
-extern PFN_clEnqueueNativeKernel clEnqueueNativeKernel;
-extern PFN_clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList;
-extern PFN_clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList;
-extern PFN_clEnqueueSVMFree clEnqueueSVMFree;
-extern PFN_clEnqueueSVMMemcpy clEnqueueSVMMemcpy;
-extern PFN_clEnqueueSVMMemFill clEnqueueSVMMemFill;
-extern PFN_clEnqueueSVMMap clEnqueueSVMMap;
-extern PFN_clEnqueueSVMUnmap clEnqueueSVMUnmap;
-extern PFN_clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform;
-extern PFN_clCreateImage2D clCreateImage2D;
-extern PFN_clCreateImage3D clCreateImage3D;
-extern PFN_clEnqueueMarker clEnqueueMarker;
-extern PFN_clEnqueueWaitForEvents clEnqueueWaitForEvents;
-extern PFN_clEnqueueBarrier clEnqueueBarrier;
-extern PFN_clUnloadCompiler clUnloadCompiler;
-extern PFN_clGetExtensionFunctionAddress clGetExtensionFunctionAddress;
-extern PFN_clCreateCommandQueue clCreateCommandQueue;
-extern PFN_clCreateSampler clCreateSampler;
-extern PFN_clEnqueueTask clEnqueueTask;
-
-// OpenGL sharing
-extern PFN_clCreateFromGLBuffer clCreateFromGLBuffer;
-extern PFN_clCreateFromGLTexture clCreateFromGLTexture;
-extern PFN_clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects;
-extern PFN_clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects;
-
-// cl_khr_egl_event extension
-extern PFN_clCreateEventFromEGLSyncKHR clCreateEventFromEGLSyncKHR;
-
-// EGL sharing
-extern PFN_clCreateFromEGLImageKHR clCreateFromEGLImageKHR;
-extern PFN_clEnqueueAcquireEGLObjectsKHR clEnqueueAcquireEGLObjectsKHR;
-extern PFN_clEnqueueReleaseEGLObjectsKHR clEnqueueReleaseEGLObjectsKHR;
-
-// For convenient image creation
-// It uses clCreateImage if it available (clCreateImage available since cl 1.2)
-// otherwise it will use legacy clCreateImage2D
-cl_mem CreateImage2DLegacy(cl_context context, cl_mem_flags flags,
-                           const cl_image_format *image_format, const cl_image_desc *image_desc,
-                           void *host_ptr, cl_int *errcode_ret);
-
-// It uses clCreateImage if it available (clCreateImage available since cl 1.2)
-// otherwise it will use legacy clCreateImage3D
-cl_mem CreateImage3DLegacy(cl_context context, cl_mem_flags flags,
-                           const cl_image_format *image_format, const cl_image_desc *image_desc,
-                           void *host_ptr, cl_int *errcode_ret);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_WRAPPERE_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Operations.cc b/runtime/onert/backend/gpu_cl/open_cl/Operations.cc
deleted file mode 100644
index 2608b5364..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Operations.cc
+++ /dev/null
@@ -1,704 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Operations.h"
-#include "open_cl/Operations.h"
-
-#include <algorithm>
-#include <cstdint>
-#include <set>
-#include <string>
-#include <utility>
-#include <vector>
-#include <unordered_map>
-
-#include "absl/container/flat_hash_map.h"
-
-#include "Shape.h"
-#include "Status.h"
-#include "InternalTensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-Padding2D &Padding2D::operator=(const Padding2D &value)
-{
-  prepended = value.prepended;
-  appended = value.appended;
-  return *this;
-}
-
-bool Padding2D::operator==(const Padding2D &value)
-{
-  return this->prepended == value.prepended && this->appended == value.appended;
-}
-
-bool Padding2D::operator!=(const Padding2D &value) { return !(*this == value); }
-
-Padding2D &Padding2D::operator-(const Padding2D &value)
-{
-  prepended.h -= value.prepended.h;
-  prepended.w -= value.prepended.w;
-  appended.h -= value.appended.h;
-  appended.w -= value.appended.w;
-  return *this;
-}
-
-Padding3D &Padding3D::operator=(const Padding3D &value)
-{
-  prepended = value.prepended;
-  appended = value.appended;
-  return *this;
-}
-
-bool Padding3D::operator==(const Padding3D &value)
-{
-  return this->prepended == value.prepended && this->appended == value.appended;
-}
-
-bool Padding3D::operator!=(const Padding3D &value) { return !(*this == value); }
-
-Padding3D &Padding3D::operator-(const Padding3D &value)
-{
-  prepended.h -= value.prepended.h;
-  prepended.w -= value.prepended.w;
-  prepended.d -= value.prepended.d;
-  appended.h -= value.appended.h;
-  appended.w -= value.appended.w;
-  appended.d -= value.appended.d;
-  return *this;
-}
-
-std::string ToString(enum OperationType op)
-{
-  switch (op)
-  {
-    // case OperationType::ABS:
-    //   return "abs";
-    case OperationType::ADD:
-      return "add";
-    // case OperationType::CONCAT:
-    //   return "concat";
-    // case OperationType::COS:
-    //   return "cos";
-    // case OperationType::EXP:
-    //   return "exp";
-    // case OperationType::LOG:
-    //   return "log";
-    // case OperationType::NEG:
-    //   return "neg";
-    // case OperationType::POOLING_2D:
-    //   return "pooling_2d";
-    // case OperationType::REDUCE_MAXIMUM:
-    //   return "reduce_maximum";
-    // case OperationType::REDUCE_MINIMUM:
-    //   return "reduce_minimum";
-    // case OperationType::REDUCE_PRODUCT:
-    //   return "reduce_product";
-    // case OperationType::REDUCE_SUM:
-    //   return "reduce_sum";
-    // case OperationType::RESIZE:
-    //   return "resize";
-    // case OperationType::RELU:
-    //   return "relu";
-    // case OperationType::RSQRT:
-    //   return "rsqrt";
-    // case OperationType::SQRT:
-    //   return "sqrt";
-    // case OperationType::SQUARE:
-    //   return "square";
-    case OperationType::UNKNOWN:
-      return "unknown_operation";
-  }
-  return "";
-}
-
-OperationType OperationTypeFromString(const std::string &name)
-{
-  static const auto operations = new std::unordered_map<std::string, OperationType>({
-    // {"abs", OperationType::ABS},
-    {"add", OperationType::ADD},
-    // {"concat", OperationType::CONCAT},
-    // {"cos", OperationType::COS},
-    // {"exp", OperationType::EXP},
-    // {"log", OperationType::LOG},
-    // {"neg", OperationType::NEG},
-    // {"pooling_2d", OperationType::POOLING_2D},
-    // {"reduce_maximum", OperationType::REDUCE_MAXIMUM},
-    // {"reduce_minimum", OperationType::REDUCE_MINIMUM},
-    // {"reduce_product", OperationType::REDUCE_PRODUCT},
-    // {"reduce_sum", OperationType::REDUCE_SUM},
-    // {"relu", OperationType::RELU},
-    // {"resize", OperationType::RESIZE},
-    // {"rsqrt", OperationType::RSQRT},
-    // {"sqrt", OperationType::SQRT},
-    // {"square", OperationType::SQUARE},
-  });
-  auto op = operations->find(name);
-  return op == operations->end() ? OperationType::UNKNOWN : op->second;
-}
-
-namespace
-{
-
-template <typename T> T DivideRoundUp(T n, T divisor) { return (n - 1) / divisor + 1; }
-
-int32_t CalculateOutputSizeBeforeStrides(int32_t input, int32_t kernel, int32_t padding,
-                                         int32_t dilation)
-{
-  const int32_t dilated_kernel = (kernel - 1) * dilation + 1;
-  return input + padding - dilated_kernel + 1;
-}
-
-template <Axis T>
-int32_t CalculateOutputWithoutStrides(const BHWC &input, const Convolution2DAttributes &attr)
-{
-  return CalculateOutputSizeBeforeStrides(
-    input.get<T>(), attr.weights.shape.get<T>(),
-    attr.padding.prepended.get<T>() + attr.padding.appended.get<T>(), attr.dilations.get<T>());
-}
-
-template <Axis T>
-int32_t CalculateOutputWithoutStrides(const BHWDC &input, const Convolution3DAttributes &attr)
-{
-  return CalculateOutputSizeBeforeStrides(
-    input.get<T>(), attr.weights.shape.get<T>(),
-    attr.padding.prepended.get<T>() + attr.padding.appended.get<T>(), attr.dilations.get<T>());
-}
-
-template <Axis T>
-int32_t CalculateOutputWithoutStrides(const BHWC &input, const Pooling2DAttributes &attr)
-{
-  return CalculateOutputSizeBeforeStrides(input.get<T>(), attr.kernel.get<T>(),
-                                          attr.padding.prepended.get<T>() +
-                                            attr.padding.appended.get<T>(),
-                                          /*dilation=*/1);
-}
-
-template <Axis T>
-int32_t CalculateOutputWithoutStrides(const BHWDC &input, const Pooling3DAttributes &attr)
-{
-  return CalculateOutputSizeBeforeStrides(input.get<T>(), attr.kernel.get<T>(),
-                                          attr.padding.prepended.get<T>() +
-                                            attr.padding.appended.get<T>(),
-                                          /*dilation=*/1);
-}
-
-template <Axis T>
-int32_t CalculateOutput(const BHWC &input, const ConvolutionTransposedAttributes &attr)
-{
-  return (input.get<T>() - 1) * attr.stride.get<T>() -
-         (attr.padding.prepended.get<T>() + attr.padding.appended.get<T>()) +
-         attr.weights.shape.get<T>() + attr.adjacent.get<T>();
-}
-
-template <Axis T>
-int32_t CalculateOutput(const BHWDC &input, const ConvolutionTransposed3DAttributes &attr)
-{
-  return (input.get<T>() - 1) * attr.stride.get<T>() -
-         (attr.padding.prepended.get<T>() + attr.padding.appended.get<T>()) +
-         attr.weights.shape.get<T>();
-}
-
-inline int32_t StridedSize(int32_t size, int32_t stride)
-{
-  return stride == 0 ? -1 : DivideRoundUp(size, stride);
-}
-
-template <Axis AxisT, typename AttrT> int32_t CalculateOutput(const BHWC &input, const AttrT &attr)
-{
-  return StridedSize(CalculateOutputWithoutStrides<AxisT>(input, attr),
-                     attr.strides.template get<AxisT>());
-}
-
-template <Axis AxisT, typename AttrT> int32_t CalculateOutput(const BHWDC &input, const AttrT &attr)
-{
-  return StridedSize(CalculateOutputWithoutStrides<AxisT>(input, attr),
-                     attr.strides.template get<AxisT>());
-}
-
-int32_t CalculateSamePadding(int32_t input, int32_t kernel, int32_t dilation, int32_t stride)
-{
-  const int32_t dilated_kernel = (kernel - 1) * dilation + 1;
-  return std::max(0, dilated_kernel - (input - 1) % stride - 1);
-}
-
-// Returns a padding that should be present to make sure image size stays
-// the same.
-template <Axis AxisT>
-int32_t CalculateSamePadding(const BHWC &input, const Convolution2DAttributes &attr)
-{
-  return CalculateSamePadding(input.get<AxisT>(), attr.weights.shape.get<AxisT>(),
-                              attr.dilations.get<AxisT>(), attr.strides.get<AxisT>());
-}
-
-// Returns a padding that should be present to make sure image size stays
-// the same.
-template <Axis AxisT>
-int32_t CalculateSamePadding(const BHWDC &input, const Convolution3DAttributes &attr)
-{
-  return CalculateSamePadding(input.get<AxisT>(), attr.weights.shape.get<AxisT>(),
-                              attr.dilations.get<AxisT>(), attr.strides.get<AxisT>());
-}
-
-template <Axis AxisT>
-int32_t CalculateSamePadding(const BHWC &input, const ConvolutionTransposedAttributes &attr)
-{
-  return CalculateSamePadding(input.get<AxisT>(), attr.weights.shape.get<AxisT>(),
-                              /*dilation=*/1, attr.stride.get<AxisT>());
-}
-
-template <Axis AxisT>
-int32_t CalculateSamePadding(const BHWDC &input, const ConvolutionTransposed3DAttributes &attr)
-{
-  return CalculateSamePadding(input.get<AxisT>(), attr.weights.shape.get<AxisT>(),
-                              /*dilation=*/1, attr.stride.get<AxisT>());
-}
-
-template <Axis AxisT>
-int32_t CalculateSamePadding(const BHWC &input, const Pooling2DAttributes &attr)
-{
-  return CalculateSamePadding(input.get<AxisT>(), attr.kernel.get<AxisT>(),
-                              /*dilation=*/1, attr.strides.get<AxisT>());
-}
-
-template <Axis AxisT>
-int32_t CalculateSamePadding(const BHWDC &input, const Pooling3DAttributes &attr)
-{
-  return CalculateSamePadding(input.get<AxisT>(), attr.kernel.get<AxisT>(),
-                              /*dilation=*/1, attr.strides.get<AxisT>());
-}
-
-template <Axis AxisT>
-int32_t CalculateSamePadding(const BHWC &input, const MaxUnpooling2DAttributes &attr)
-{
-  return CalculateSamePadding(input.get<AxisT>(), attr.kernel.get<AxisT>(),
-                              /*dilation=*/1, attr.strides.get<AxisT>());
-}
-
-template <Axis AxisT>
-int32_t CalculateSamePadding(const BHWDC &input, const MaxUnpooling3DAttributes &attr)
-{
-  return CalculateSamePadding(input.get<AxisT>(), attr.kernel.get<AxisT>(),
-                              /*dilation=*/1, attr.strides.get<AxisT>());
-}
-
-Padding2D MakeSamePadding(const BHWC &input, const ConvolutionTransposedAttributes &attr)
-{
-  int32_t padding_height = CalculateSamePadding<Axis::HEIGHT>(input, attr);
-  int32_t padding_width = CalculateSamePadding<Axis::WIDTH>(input, attr);
-  Padding2D padding;
-  padding.prepended = HW(padding_height / 2, padding_width / 2);
-  padding.appended = HW(padding_height - padding_height / 2, padding_width - padding_width / 2);
-  return padding;
-}
-
-Padding3D MakeSamePadding(const BHWDC &input, const ConvolutionTransposed3DAttributes &attr)
-{
-  int32_t padding_height = CalculateSamePadding<Axis::HEIGHT>(input, attr);
-  int32_t padding_width = CalculateSamePadding<Axis::WIDTH>(input, attr);
-  int32_t padding_depth = CalculateSamePadding<Axis::DEPTH>(input, attr);
-  Padding3D padding;
-  padding.prepended = HWD(padding_height / 2, padding_width / 2, padding_depth / 2);
-  padding.appended = HWD(padding_height - padding_height / 2, padding_width - padding_width / 2,
-                         padding_depth - padding_depth / 2);
-  return padding;
-}
-
-// If padding depends on input, convert it into fixed padding.
-template <class AttrT> Padding2D MakeSamePadding(const BHWC &input, const AttrT &attr)
-{
-  int32_t padding_height = CalculateSamePadding<Axis::HEIGHT>(input, attr);
-  int32_t padding_width = CalculateSamePadding<Axis::WIDTH>(input, attr);
-  Padding2D padding;
-  padding.prepended = HW(padding_height / 2, padding_width / 2);
-  padding.appended = HW(padding_height - padding_height / 2, padding_width - padding_width / 2);
-  return padding;
-}
-
-// If padding depends on input, convert it into fixed padding.
-template <class AttrT> Padding3D MakeSamePadding(const BHWDC &input, const AttrT &attr)
-{
-  int32_t padding_height = CalculateSamePadding<Axis::HEIGHT>(input, attr);
-  int32_t padding_width = CalculateSamePadding<Axis::WIDTH>(input, attr);
-  int32_t padding_depth = CalculateSamePadding<Axis::DEPTH>(input, attr);
-  Padding3D padding;
-  padding.prepended = HWD(padding_height / 2, padding_width / 2, padding_depth / 2);
-  padding.appended = HWD(padding_height - padding_height / 2, padding_width - padding_width / 2,
-                         padding_depth - padding_depth / 2);
-  return padding;
-}
-
-} // namespace
-
-BHWC CalculateOutputShape(const BHWC &input, const MaxUnpooling2DAttributes &attr)
-{
-  return BHWC(
-    input.b, input.h * attr.strides.h - attr.padding.prepended.h - attr.padding.appended.h,
-    input.w * attr.strides.w - attr.padding.prepended.w - attr.padding.appended.w, input.c);
-}
-
-BHWDC CalculateOutputShape(const BHWDC &input, const MaxUnpooling3DAttributes &attr)
-{
-  return BHWDC(
-    input.b, input.h * attr.strides.h - attr.padding.prepended.h - attr.padding.appended.h,
-    input.w * attr.strides.w - attr.padding.prepended.w - attr.padding.appended.w,
-    input.d * attr.strides.d - attr.padding.prepended.d - attr.padding.appended.d, input.c);
-}
-
-BHWC CalculateOutputShape(const BHWC &input, const Pooling2DAttributes &attr)
-{
-  return BHWC(input.b, CalculateOutput<Axis::HEIGHT>(input, attr),
-              CalculateOutput<Axis::WIDTH>(input, attr), input.c);
-}
-
-BHWDC CalculateOutputShape(const BHWDC &input, const Pooling3DAttributes &attr)
-{
-  return BHWDC(input.b, CalculateOutput<Axis::HEIGHT>(input, attr),
-               CalculateOutput<Axis::WIDTH>(input, attr), CalculateOutput<Axis::DEPTH>(input, attr),
-               input.c);
-}
-
-BHWC CalculateOutputShape(const BHWC &input, const Convolution2DAttributes &attr)
-{
-  return BHWC(input.b, CalculateOutput<Axis::HEIGHT>(input, attr),
-              CalculateOutput<Axis::WIDTH>(input, attr),
-              attr.weights.shape.get<Axis::OUTPUT_CHANNELS>());
-}
-
-BHWDC CalculateOutputShape(const BHWDC &input, const Convolution3DAttributes &attr)
-{
-  return BHWDC(input.b, CalculateOutput<Axis::HEIGHT>(input, attr),
-               CalculateOutput<Axis::WIDTH>(input, attr), CalculateOutput<Axis::DEPTH>(input, attr),
-               attr.weights.shape.get<Axis::OUTPUT_CHANNELS>());
-}
-
-BHWC CalculateOutputShape(const BHWC &input, const ConvolutionTransposedAttributes &attr)
-{
-  return BHWC(input.b, CalculateOutput<Axis::HEIGHT>(input, attr),
-              CalculateOutput<Axis::WIDTH>(input, attr),
-              attr.weights.shape.get<Axis::OUTPUT_CHANNELS>());
-}
-
-BHWDC CalculateOutputShape(const BHWDC &input, const ConvolutionTransposed3DAttributes &attr)
-{
-  return BHWDC(input.b, CalculateOutput<Axis::HEIGHT>(input, attr),
-               CalculateOutput<Axis::WIDTH>(input, attr), CalculateOutput<Axis::DEPTH>(input, attr),
-               attr.weights.shape.get<Axis::OUTPUT_CHANNELS>());
-}
-
-BHWC CalculateOutputShape(const BHWC &input, const DepthwiseConvolution2DAttributes &attr)
-{
-  return BHWC(input.b, CalculateOutput<Axis::HEIGHT>(input, attr),
-              CalculateOutput<Axis::WIDTH>(input, attr),
-              attr.weights.shape.get<Axis::OUTPUT_CHANNELS>() *
-                attr.weights.shape.get<Axis::INPUT_CHANNELS>());
-}
-
-BHWDC CalculateOutputShape(const BHWDC &input, const DepthwiseConvolution3DAttributes &attr)
-{
-  return BHWDC(input.b, CalculateOutput<Axis::HEIGHT>(input, attr),
-               CalculateOutput<Axis::WIDTH>(input, attr), CalculateOutput<Axis::DEPTH>(input, attr),
-               attr.weights.shape.get<Axis::OUTPUT_CHANNELS>() *
-                 attr.weights.shape.get<Axis::INPUT_CHANNELS>());
-}
-
-BHWC CalculateOutputShape(const BHWC &input, const SliceAttributes &attr)
-{
-  (void)input;
-  return BHWC(StridedSize(attr.ends.b - attr.starts.b, attr.strides.b),
-              StridedSize(attr.ends.h - attr.starts.h, attr.strides.h),
-              StridedSize(attr.ends.w - attr.starts.w, attr.strides.w),
-              StridedSize(attr.ends.c - attr.starts.c, attr.strides.c));
-}
-
-BHWDC CalculateOutputShape(const BHWDC &input, const Slice3DAttributes &attr)
-{
-  (void)input;
-  return BHWDC(StridedSize(attr.ends.b - attr.starts.b, attr.strides.b),
-               StridedSize(attr.ends.h - attr.starts.h, attr.strides.h),
-               StridedSize(attr.ends.w - attr.starts.w, attr.strides.w),
-               StridedSize(attr.ends.d - attr.starts.d, attr.strides.d),
-               StridedSize(attr.ends.c - attr.starts.c, attr.strides.c));
-}
-
-BHWC CalculateOutputShape(const BHWC &input, const PadAttributes &attr)
-{
-  return BHWC(
-    attr.appended.b + attr.prepended.b + input.b, attr.appended.h + attr.prepended.h + input.h,
-    attr.appended.w + attr.prepended.w + input.w, attr.appended.c + attr.prepended.c + input.c);
-}
-
-BHWDC CalculateOutputShape(const BHWDC &input, const Pad3DAttributes &attr)
-{
-  return BHWDC(
-    attr.appended.b + attr.prepended.b + input.b, attr.appended.h + attr.prepended.h + input.h,
-    attr.appended.w + attr.prepended.w + input.w, attr.appended.d + attr.prepended.d + input.d,
-    attr.appended.c + attr.prepended.c + input.c);
-}
-
-BHWC CalculateOutputShape(const BHWC &input, const FullyConnectedAttributes &attr)
-{
-  return BHWC(input.b, 1, 1, attr.weights.shape.o);
-}
-
-BHWC CalculateOutputShape(const BHWC &input, const MeanAttributes &attr)
-{
-  const int b = attr.dims.find(Axis::BATCH) == attr.dims.end() ? input.b : 1;
-  const int h = attr.dims.find(Axis::HEIGHT) == attr.dims.end() ? input.h : 1;
-  const int w = attr.dims.find(Axis::WIDTH) == attr.dims.end() ? input.w : 1;
-  const int c = attr.dims.find(Axis::CHANNELS) == attr.dims.end() ? input.c : 1;
-  return BHWC(b, h, w, c);
-}
-
-absl::Status CalculateOutputShape(const std::vector<BHWC> &input, const ConcatAttributes &attr,
-                                  BHWC *output_shape)
-{
-  BHWC new_shape = input[0];
-  switch (attr.axis)
-  {
-    case Axis::CHANNELS:
-      for (size_t i = 1; i < input.size(); i++)
-      {
-        if (input[i].h != new_shape.h || input[i].w != new_shape.w || input[i].b != new_shape.b)
-        {
-          return absl::InvalidArgumentError(
-            "Height, Width and Batch must be the same when concatenating "
-            "by channels axis");
-        }
-        new_shape.c += input[i].c;
-      }
-      break;
-    case Axis::HEIGHT:
-      for (size_t i = 1; i < input.size(); i++)
-      {
-        if (input[i].w != new_shape.w || input[i].c != new_shape.c || input[i].b != new_shape.b)
-        {
-          return absl::InvalidArgumentError(
-            "Channels, Width and Batch must be the same when concatenating "
-            "by height axis");
-        }
-        new_shape.h += input[i].h;
-      }
-      break;
-    case Axis::WIDTH:
-      for (size_t i = 1; i < input.size(); i++)
-      {
-        if (input[i].h != new_shape.h || input[i].c != new_shape.c || input[i].b != new_shape.b)
-        {
-          return absl::InvalidArgumentError(
-            "Height, Channels and Batch must be the same when concatenating "
-            "by width axis");
-        }
-        new_shape.w += input[i].w;
-      }
-      break;
-    case Axis::BATCH:
-      for (size_t i = 1; i < input.size(); i++)
-      {
-        if (input[i].h != new_shape.h || input[i].c != new_shape.c || input[i].w != new_shape.w)
-        {
-          return absl::InvalidArgumentError(
-            "Width, Height and Channels must be the same when concatenating "
-            "by batch axis");
-        }
-        new_shape.b += input[i].b;
-      }
-      break;
-    default:
-      return absl::InvalidArgumentError("Invalid axis");
-      break;
-  }
-  *output_shape = new_shape;
-  return absl::OkStatus();
-}
-
-absl::Status CalculateOutputShape(const std::vector<BHWDC> &input, const ConcatAttributes &attr,
-                                  BHWDC *output_shape)
-{
-  BHWDC new_shape = input[0];
-  switch (attr.axis)
-  {
-    case Axis::CHANNELS:
-      for (size_t i = 1; i < input.size(); ++i)
-      {
-        if (input[i].h != new_shape.h || input[i].w != new_shape.w || input[i].d != new_shape.d ||
-            input[i].b != new_shape.b)
-        {
-          return absl::InvalidArgumentError("Height, Width, Batch and Depth must be the same when "
-                                            "concatenating "
-                                            "by channels axis");
-        }
-        new_shape.c += input[i].c;
-      }
-      break;
-    case Axis::HEIGHT:
-      for (size_t i = 1; i < input.size(); ++i)
-      {
-        if (input[i].w != new_shape.w || input[i].c != new_shape.c || input[i].d != new_shape.d ||
-            input[i].b != new_shape.b)
-        {
-          return absl::InvalidArgumentError(
-            "Width, Depth, Batch and Channels must be the same when "
-            "concatenating "
-            "by height axis");
-        }
-        new_shape.h += input[i].h;
-      }
-      break;
-    case Axis::WIDTH:
-      for (size_t i = 1; i < input.size(); ++i)
-      {
-        if (input[i].h != new_shape.h || input[i].c != new_shape.c || input[i].d != new_shape.d ||
-            input[i].b != new_shape.b)
-        {
-          return absl::InvalidArgumentError(
-            "Height, Depth, Batch and Channels must be the same when "
-            "concatenating "
-            "by width axis");
-        }
-        new_shape.w += input[i].w;
-      }
-      break;
-    case Axis::DEPTH:
-      for (size_t i = 1; i < input.size(); ++i)
-      {
-        if (input[i].w != new_shape.w || input[i].h != new_shape.h || input[i].c != new_shape.c ||
-            input[i].b != new_shape.b)
-        {
-          return absl::InvalidArgumentError(
-            "Width, Height, Batch and Channels must be the same when "
-            "concatenating "
-            "by depth axis");
-        }
-        new_shape.d += input[i].d;
-      }
-      break;
-    case Axis::BATCH:
-      for (size_t i = 1; i < input.size(); ++i)
-      {
-        if (input[i].w != new_shape.w || input[i].h != new_shape.h || input[i].c != new_shape.c ||
-            input[i].d != new_shape.d)
-        {
-          return absl::InvalidArgumentError(
-            "Width, Height, Depth and Channels must be the same when "
-            "concatenating "
-            "by batch axis");
-        }
-        new_shape.b += input[i].b;
-      }
-      break;
-    default:
-      return absl::InvalidArgumentError("Invalid axis");
-  }
-  *output_shape = new_shape;
-  return absl::OkStatus();
-}
-
-Padding2D CalculateSamePadding(const BHWC &input, const Convolution2DAttributes &attr)
-{
-  return MakeSamePadding(input, attr);
-}
-
-Padding3D CalculateSamePadding(const BHWDC &input, const Convolution3DAttributes &attr)
-{
-  return MakeSamePadding(input, attr);
-}
-
-Padding2D CalculateSamePadding(const BHWC &input, const ConvolutionTransposedAttributes &attr)
-{
-  return MakeSamePadding(input, attr);
-}
-
-Padding3D CalculateSamePadding(const BHWDC &input, const ConvolutionTransposed3DAttributes &attr)
-{
-  return MakeSamePadding(input, attr);
-}
-
-Padding2D CalculateSamePadding(const BHWC &input, const DepthwiseConvolution2DAttributes &attr)
-{
-  return MakeSamePadding(input, attr);
-}
-
-Padding3D CalculateSamePadding(const BHWDC &input, const DepthwiseConvolution3DAttributes &attr)
-{
-  return MakeSamePadding(input, attr);
-}
-
-Padding2D CalculateSamePadding(const BHWC &input, const Pooling2DAttributes &attr)
-{
-  return MakeSamePadding(input, attr);
-}
-
-Padding3D CalculateSamePadding(const BHWDC &input, const Pooling3DAttributes &attr)
-{
-  return MakeSamePadding(input, attr);
-}
-
-Padding2D CalculateSamePadding(const BHWC &input, const MaxUnpooling2DAttributes &attr)
-{
-  return MakeSamePadding(input, attr);
-}
-
-Padding3D CalculateSamePadding(const BHWDC &input, const MaxUnpooling3DAttributes &attr)
-{
-  return MakeSamePadding(input, attr);
-}
-
-float CalculateResizeScale(int32_t input_size, int32_t output_size, const Resize2DAttributes &attr)
-{
-  return attr.align_corners && input_size > 1 && output_size > 1
-           ? static_cast<float>(input_size - 1) / (output_size - 1)
-           : static_cast<float>(input_size) / output_size;
-}
-
-float CalculateResizeScale(int32_t input_size, int32_t output_size, const Resize3DAttributes &attr)
-{
-  return attr.align_corners && input_size > 1 && output_size > 1
-           ? static_cast<float>(input_size - 1) / (output_size - 1)
-           : static_cast<float>(input_size) / output_size;
-}
-
-BHWC CalculateOutputShape(const BHWC &input, const Resize2DAttributes &attr)
-{
-  return BHWC(input.b, attr.new_shape.h, attr.new_shape.w, input.c);
-}
-
-BHWDC CalculateOutputShape(const BHWDC &input, const Resize3DAttributes &attr)
-{
-  return BHWDC(input.b, attr.new_shape.h, attr.new_shape.w, attr.new_shape.d, input.c);
-}
-
-BHWC CalculateOutputShape(const BHWC &input, const TransposeAttributes &attr)
-{
-  return BHWC(input.get(attr.perm.b), input.get(attr.perm.h), input.get(attr.perm.w),
-              input.get(attr.perm.c));
-}
-
-BHWDC CalculateOutputShape(const BHWDC &input, const Transpose3DAttributes &attr)
-{
-  return BHWDC(input.get(attr.perm.b), input.get(attr.perm.h), input.get(attr.perm.w),
-               input.get(attr.perm.d), input.get(attr.perm.c));
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Operations.h b/runtime/onert/backend/gpu_cl/open_cl/Operations.h
deleted file mode 100644
index 825eb90a4..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Operations.h
+++ /dev/null
@@ -1,586 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_OPERATIONS_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_OPERATIONS_H__
-
-#include <cstdint>
-#include <set>
-#include <string>
-#include <vector>
-
-#include "absl/types/variant.h"
-
-#include "DataType.h"
-#include "Shape.h"
-#include "Status.h"
-#include "InternalTensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum class OperationType
-{
-  UNKNOWN = 0,
-  // ABS,
-  ADD,
-  // BATCH_TO_SPACE,
-  // BATCH_NORMALIZATION,
-  // BATCHED_MATMUL,
-  // CONCAT,
-  // CONST,
-  // CONVOLUTION_2D,
-  // CONVOLUTION_TRANSPOSED,
-  // COPY,
-  // COS,
-  // DEPTHWISE_CONVOLUTION,
-  // DIV,
-  // ELU,
-  // EQUAL,
-  // EXP,
-  // FULLY_CONNECTED,
-  // GREATER,
-  // GREATER_EQUAL,
-  // HARD_SWISH,
-  // LESS,
-  // LESS_EQUAL,
-  // LOG,
-  // LSTM,
-  // MAXIMUM,
-  // MAX_UNPOOLING_2D,
-  // MEAN,
-  // MEAN_STDDEV_NORMALIZATION,
-  // MINIMUM,
-  // MUL,
-  // NEG,
-  // NOT_EQUAL,
-  // PAD,
-  // POOLING_2D,
-  // POW,
-  // PRELU,
-  // Used to accurately run inference on quantized models.
-  // QUANTIZE_AND_DEQUANTIZE,
-  // REDUCE_MAXIMUM,
-  // REDUCE_MINIMUM,
-  // REDUCE_PRODUCT,
-  // REDUCE_SUM,
-  // RELU,
-  // RESHAPE,
-  // RESIZE,
-  // RSQRT,
-  // SIGMOID,
-  // SIN,
-  // SLICE,
-  // SOFTMAX,
-  // SPACE_TO_BATCH,
-  // SPACE_TO_DEPTH,
-  // SQRT,
-  // SQUARE,
-  // SQUARED_DIFF,
-  // SUB,
-  // TANH,
-  // TRANSPOSE,
-};
-
-std::string ToString(enum OperationType op);
-
-OperationType OperationTypeFromString(const std::string &name);
-
-typedef absl::variant<absl::monostate, InternalTensor<HWC, DataType::FLOAT32>,
-                      InternalTensor<Linear, DataType::FLOAT32>, float>
-  TensorOrScalar;
-
-struct Padding2D
-{
-  Padding2D() = default;
-  Padding2D(const Padding2D &);
-  Padding2D &operator=(const Padding2D &value);
-  bool operator==(const Padding2D &value);
-  bool operator!=(const Padding2D &value);
-  Padding2D &operator-(const Padding2D &value);
-
-  // Padding values for every axis (if needed), where 'prepended' defines
-  // padding for the beginning of each axis and 'appended' represents end part
-  // of the corresponding axis.
-  HW prepended = HW(-1, -1);
-  HW appended = HW(-1, -1);
-};
-
-struct Padding3D
-{
-  Padding3D() = default;
-  Padding3D(const Padding3D &);
-  Padding3D &operator=(const Padding3D &value);
-  bool operator==(const Padding3D &value);
-  bool operator!=(const Padding3D &value);
-  Padding3D &operator-(const Padding3D &value);
-  // Padding values for every axis (if needed), where 'prepended' defines
-  // padding for the beginning of each axis and 'appended' represents end part
-  // of the corresponding axis.
-  HWD prepended = HWD(0, 0, 0);
-  HWD appended = HWD(0, 0, 0);
-};
-
-struct Crop2D : public Padding2D
-{
-};
-
-struct SpaceToBatchAttributes
-{
-  HW block;
-  Padding2D padding;
-};
-
-struct BatchToSpaceAttributes
-{
-  HW block;
-  Crop2D crop;
-};
-
-enum class PoolingType
-{
-  UNDEFINED = 0,
-
-  // average pooling
-  AVERAGE = 1,
-
-  // max pooling
-  MAX = 2,
-};
-
-struct Pooling2DAttributes
-{
-  PoolingType type = PoolingType::UNDEFINED;
-  // Strides for every axis.
-  HW strides = HW(-1, -1);
-  HW kernel = HW(-1, -1);
-  Padding2D padding;
-  // NOTE(akulik): technically the number of outputs from Pooling node indicates
-  // whether indices are needed or not, but I decided to keep it inside
-  // attributes to simplify processing.
-  bool output_indices = false;
-};
-
-struct Pooling3DAttributes
-{
-  PoolingType type = PoolingType::UNDEFINED;
-  // Strides for every axis.
-  HWD strides = HWD(0, 0, 0);
-  HWD kernel = HWD(0, 0, 0);
-  Padding3D padding;
-  // NOTE(akulik): technically the number of outputs from Pooling node indicates
-  // whether indices are needed or not, but I decided to keep it inside
-  // attributes to simplify processing.
-  bool output_indices = false;
-};
-
-struct MaxUnpooling2DAttributes
-{
-  // Strides for every axis.
-  HW strides = HW(-1, -1);
-  HW kernel = HW(-1, -1);
-  Padding2D padding;
-};
-
-struct MaxUnpooling3DAttributes
-{
-  // Strides for every axis.
-  HWD strides = HWD(0, 0, 0);
-  HWD kernel = HWD(0, 0, 0);
-  Padding3D padding;
-};
-
-struct MeanAttributes
-{
-  // The vector of dimensions to calculate mean along.
-  std::set<Axis> dims;
-};
-
-struct ConcatAttributes
-{
-  // Defines axis by which to concat on.
-  Axis axis = Axis::UNKNOWN;
-};
-
-// @return shape of a tensor after MaxUnpooling2D operation is applied to
-//         the given input.
-BHWC CalculateOutputShape(const BHWC &input, const MaxUnpooling2DAttributes &attr);
-
-// @return shape of a tensor after MaxUnpooling3D operation is applied to
-//         the given input.
-BHWDC CalculateOutputShape(const BHWDC &input, const MaxUnpooling3DAttributes &attr);
-
-// @return shape of a tensor after Pooling2D operation is applied to the given
-//         input.
-BHWC CalculateOutputShape(const BHWC &input, const Pooling2DAttributes &attr);
-
-// @return shape of a tensor after Pooling3D operation is applied to the given
-//         input.
-BHWDC CalculateOutputShape(const BHWDC &input, const Pooling3DAttributes &attr);
-
-// @return shape of a tensor after Concat operation is applied to the given
-//         input.
-absl::Status CalculateOutputShape(const std::vector<BHWC> &input, const ConcatAttributes &attr,
-                                  BHWC *output_shape);
-
-// @return shape of a tensor after Concat operation is applied to the given
-//         input.
-absl::Status CalculateOutputShape(const std::vector<BHWDC> &input, const ConcatAttributes &attr,
-                                  BHWDC *output_shape);
-
-// @return padding for pooling operation to make sure output keep the same shape
-// as the given input.
-Padding2D CalculateSamePadding(const BHWC &input, const Pooling2DAttributes &attr);
-
-// @return padding for pooling operation to make sure output keep the same shape
-// as the given input.
-Padding3D CalculateSamePadding(const BHWDC &input, const Pooling3DAttributes &attr);
-
-// @return padding for max unpooling operation to make sure output keep the same
-// shape as the given input.
-Padding2D CalculateSamePadding(const BHWC &input, const MaxUnpooling2DAttributes &attr);
-
-// @return padding for max unpooling operation to make sure output keep the same
-// shape as the given input.
-Padding3D CalculateSamePadding(const BHWDC &input, const MaxUnpooling3DAttributes &attr);
-
-struct Convolution2DAttributes
-{
-  HW strides = HW(1, 1);   // Along each axis.
-  HW dilations = HW(1, 1); // Along each axis.
-  Padding2D padding;
-
-  InternalTensor<OHWI, DataType::FLOAT32> weights;
-  InternalTensor<Linear, DataType::FLOAT32> bias; // optional
-};
-
-struct Convolution3DAttributes
-{
-  HWD strides = HWD(0, 0, 0);   // Along each axis.
-  HWD dilations = HWD(0, 0, 0); // Along each axis.
-  Padding3D padding;
-
-  InternalTensor<OHWDI, DataType::FLOAT32> weights;
-  InternalTensor<Linear, DataType::FLOAT32> bias; // optional
-};
-
-// @return shape of a tensor after Convolution2D operation is applied to
-//         the given input.
-BHWC CalculateOutputShape(const BHWC &input, const Convolution2DAttributes &attr);
-
-// @return shape of a tensor after Convolution3D operation is applied to
-//         the given input.
-BHWDC CalculateOutputShape(const BHWDC &input, const Convolution3DAttributes &attr);
-
-// @return padding for convolution operation to make sure output keep the same
-// shape as the given input.
-Padding2D CalculateSamePadding(const BHWC &input, const Convolution2DAttributes &attr);
-
-// @return padding for convolution operation to make sure output keep the same
-// shape as the given input.
-Padding3D CalculateSamePadding(const BHWDC &input, const Convolution3DAttributes &attr);
-
-struct ConvolutionTransposedAttributes
-{
-  HW stride = HW(1, 1); // Along each axis.
-  HW adjacent;          // TODO(sorokin): No op on Flow.
-  Padding2D padding;
-
-  InternalTensor<OHWI, DataType::FLOAT32> weights;
-  InternalTensor<Linear, DataType::FLOAT32> bias; // optional
-};
-
-struct ConvolutionTransposed3DAttributes
-{
-  HWD stride = HWD(0, 0, 0); // Along each axis.
-  Padding3D padding;
-
-  InternalTensor<OHWDI, DataType::FLOAT32> weights;
-  InternalTensor<Linear, DataType::FLOAT32> bias; // optional
-};
-
-Padding2D CalculateSamePadding(const BHWC &input, const ConvolutionTransposedAttributes &attr);
-
-Padding3D CalculateSamePadding(const BHWDC &input, const ConvolutionTransposed3DAttributes &attr);
-
-// @return shape of a tensor after ConvolutionTransposed operation is applied to
-//         the given input.
-BHWC CalculateOutputShape(const BHWC &input, const ConvolutionTransposedAttributes &attr);
-
-// @return shape of a tensor after ConvolutionTransposed3D operation is applied
-// to
-//         the given input.
-BHWDC CalculateOutputShape(const BHWDC &input, const ConvolutionTransposed3DAttributes &attr);
-
-struct DepthwiseConvolution2DAttributes : public Convolution2DAttributes
-{
-};
-struct DepthwiseConvolution3DAttributes : public Convolution3DAttributes
-{
-};
-
-// @return shape of a tensor after DepthwiseConvolution2D operation is applied
-//         to the given input.
-BHWC CalculateOutputShape(const BHWC &input, const DepthwiseConvolution2DAttributes &attr);
-
-// @return shape of a tensor after DepthwiseConvolution3D operation is applied
-//         to the given input.
-BHWDC CalculateOutputShape(const BHWDC &input, const DepthwiseConvolution3DAttributes &attr);
-
-// @return padding for depthwise convolution operation to make sure output keep
-// the same shape as the given input.
-Padding2D CalculateSamePadding(const BHWC &input, const DepthwiseConvolution2DAttributes &attr);
-
-// @return padding for depthwise convolution operation to make sure output keep
-// the same shape as the given input.
-Padding3D CalculateSamePadding(const BHWDC &input, const DepthwiseConvolution3DAttributes &attr);
-
-// f(x):= {
-//   if x < 0  : x -> alpha * x
-//   if x >= 0 : x -> min(clip, x)
-// }
-//
-// Examples:
-//   - ReLU: clip = 0, alpha = 0
-//   - ReLU6: clip = 6, alpha = 0
-//   - Leaky ReLU: clip = 0, alpha = a
-struct ReLUAttributes
-{
-  // clip <= 0 mean it is not set.
-  float clip = 0;
-
-  float alpha = 0;
-};
-
-struct PReLUAttributes
-{
-  // clip <= 0 mean it is not set.
-  float clip = 0;
-
-  // If alpha is linear, then it is sharded across CHANNELS axis, otherwise
-  // full shape alpha is required.
-  absl::variant<InternalTensor<Linear, DataType::FLOAT32>, InternalTensor<HWC, DataType::FLOAT32>>
-    alpha;
-};
-
-struct ReduceAttributes
-{
-  Axis axis = Axis::UNKNOWN;
-};
-
-struct SoftmaxAttributes
-{
-  Axis axis = Axis::UNKNOWN;
-};
-
-enum LstmKernelType
-{
-  FULL = 0,
-  BASIC = 1, // Currently, only basic is supported.
-};
-
-struct LstmAttributes
-{
-  LstmKernelType kernel_type = LstmKernelType::BASIC;
-};
-
-enum class SamplingType
-{
-  UNKNOWN = 0,
-  NEAREST = 1,
-  BILINEAR = 2,
-};
-
-struct Resize2DAttributes
-{
-  HW new_shape;
-
-  SamplingType type = SamplingType::UNKNOWN;
-
-  // If true, the centers of the 4 corner pixels of the input and output tensors
-  // are aligned, preserving the values at the corner pixels. Defaults to false.
-  bool align_corners = false;
-
-  bool half_pixel_centers = false;
-};
-
-// TODO(b/147771327): rename to Resize3D
-struct Resize3DAttributes
-{
-  HWD new_shape;
-
-  SamplingType type = SamplingType::NEAREST;
-
-  // If true, the centers of the 8 corner pixels of the input and output tensors
-  // are aligned, preserving the values at the corner pixels. Defaults to false.
-  bool align_corners = false;
-
-  bool half_pixel_centers = false;
-};
-
-float CalculateResizeScale(int32_t input_size, int32_t output_size, const Resize2DAttributes &attr);
-
-float CalculateResizeScale(int32_t input_size, int32_t output_size, const Resize3DAttributes &attr);
-
-// @return shape of a tensor after scale operation is applied to the given
-// input.
-BHWC CalculateOutputShape(const BHWC &input, const Resize2DAttributes &attr);
-
-// @return shape of a tensor after scale operation is applied to the given
-// input.
-BHWDC CalculateOutputShape(const BHWDC &input, const Resize3DAttributes &attr);
-
-enum class PaddingContentType
-{
-  ZEROS = 0,
-  REFLECT = 1,
-  EDGE = 2,
-};
-
-struct PadAttributes
-{
-  PaddingContentType type = PaddingContentType::ZEROS;
-
-  BHWC prepended;
-  BHWC appended;
-};
-
-// @return shape of a tensor after Pad operation is applied to the given input.
-BHWC CalculateOutputShape(const BHWC &input, const PadAttributes &attr);
-
-struct Pad3DAttributes
-{
-  PaddingContentType type = PaddingContentType::ZEROS;
-
-  BHWDC prepended;
-  BHWDC appended;
-};
-
-// @return shape of a tensor after Pad3D operation is applied to the given
-// input.
-BHWDC CalculateOutputShape(const BHWDC &input, const Pad3DAttributes &attr);
-
-struct ConstTensorAttributes
-{
-  InternalTensor<BHWC, DataType::FLOAT32> tensor;
-};
-
-// Simple slicing without advanced support for shrinking, reverse slicing etc.
-struct SliceAttributes
-{
-  // Specifies start and end dimensions for slicing.
-  BHWC starts;
-  BHWC ends;
-
-  // Stride should be >= 1.
-  BHWC strides;
-};
-
-// @return shape of a tensor after Slice2D operation is applied to the given
-//         input.
-BHWC CalculateOutputShape(const BHWC &input, const SliceAttributes &attr);
-
-// Simple slicing without advanced support for shrinking, reverse slicing etc.
-struct Slice3DAttributes
-{
-  // Specifies start and end dimensions for slicing.
-  BHWDC starts;
-  BHWDC ends;
-
-  // Stride should be >= 1.
-  BHWDC strides;
-};
-
-// @return shape of a tensor after Slice3D operation is applied to the given
-//         input.
-BHWDC CalculateOutputShape(const BHWDC &input, const Slice3DAttributes &attr);
-
-struct FullyConnectedAttributes
-{
-  InternalTensor<OHWI, DataType::FLOAT32> weights;
-  InternalTensor<Linear, DataType::FLOAT32> bias;
-};
-
-// @return shape of a tensor after FullyConnected operation is applied to
-// the given input.
-BHWC CalculateOutputShape(const BHWC &input, const FullyConnectedAttributes &attr);
-
-// @return shape of a tensor after Mean operation is applied to the given input.
-BHWC CalculateOutputShape(const BHWC &input, const MeanAttributes &attr);
-
-struct ElementwiseAttributes
-{
-  TensorOrScalar param;
-  // For elementwise operation with 2 inputs op(A, B), runtime_tensor_is_second
-  // true when runtime tensor is B(on second position). this is important for
-  // ops that non commutative, for example substract.
-  bool runtime_tensor_is_second = false;
-};
-
-struct ReshapeAttributes
-{
-  BHWC new_shape;
-};
-
-struct Reshape3DAttributes
-{
-  BHWDC new_shape;
-};
-
-struct TransposeAttributes
-{
-  // A permutation of the dimensions of input tensor
-  BHWC perm;
-};
-
-// @return shape of a tensor after Transpose operation is applied to
-// the given input.
-BHWC CalculateOutputShape(const BHWC &input, const TransposeAttributes &attr);
-
-struct Transpose3DAttributes
-{
-  // A permutation of the dimensions of input tensor
-  BHWDC perm;
-};
-
-// @return shape of a tensor after Transpose3D operation is applied to
-// the given input.
-BHWDC CalculateOutputShape(const BHWDC &input, const Transpose3DAttributes &attr);
-
-struct SpaceToDepthAttributes
-{
-  int block_size;
-};
-
-// These help perform a combination of Quantize & Dequantize to adjust float
-// values like quantized inference would.
-struct QuantizeAndDequantizeAttributes
-{
-  float min = 0;
-  float max = 0;
-  float scale = 0;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_OPERATIONS_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Precision.cc b/runtime/onert/backend/gpu_cl/open_cl/Precision.cc
deleted file mode 100644
index bd908bd43..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Precision.cc
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Precision.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-std::string ToString(CalculationsPrecision precision)
-{
-  switch (precision)
-  {
-    case CalculationsPrecision::F32_F16:
-      return "CalculationsPrecision::F32_F16";
-    case CalculationsPrecision::F32:
-      return "CalculationsPrecision::F32";
-    case CalculationsPrecision::F16:
-      return "CalculationsPrecision::F16";
-  }
-  return " ";
-}
-
-DataType DeduceDataTypeFromPrecision(CalculationsPrecision precision)
-{
-  if (precision == CalculationsPrecision::F32)
-  {
-    return DataType::FLOAT32;
-  }
-  else
-  {
-    return DataType::FLOAT16;
-  }
-  return DataType::UNKNOWN;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Precision.h b/runtime/onert/backend/gpu_cl/open_cl/Precision.h
deleted file mode 100644
index cb910c783..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Precision.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_PRECISION_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_PRECISION_H__
-
-#include <string>
-
-#include "DataType.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum class CalculationsPrecision
-{
-  F32,
-  F32_F16,
-  F16
-};
-// F32 - all data and all math ops in F32
-// F16 - all data and all math ops in F16
-// F32_F16 - as F16, but some operations (Convolution,
-// DepthwiseConvolution, FullyConnected, ConvolutionTransposed)
-// have accumulator in F32 and usually it calculates 4 mads in F16, sum them,
-// than converts this partial sum to F32 and add to accumulator.
-
-DataType DeduceDataTypeFromPrecision(CalculationsPrecision precision);
-
-std::string ToString(CalculationsPrecision precision);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_PRECISION_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ProgramCache.cc b/runtime/onert/backend/gpu_cl/open_cl/ProgramCache.cc
deleted file mode 100644
index 350d7a1c5..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ProgramCache.cc
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "ProgramCache.h"
-
-#include <cstdint>
-#include <string>
-
-#include "ClProgram.h"
-#include "Status.h"
-#include "Util.h"
-#include "farmhash.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-ProgramCache::ProgramDescriptor::ProgramDescriptor(const std::string &code_text,
-                                                   const std::string &options,
-                                                   bool use_fingerprints)
-  : code(code_text), compiler_options(options), use_fingerprint(use_fingerprints)
-{
-  const uint64_t code_fingerprint = ::util::Fingerprint64(code);
-  const uint64_t options_fingerprint = ::util::Fingerprint64(compiler_options);
-  fingerprint = code_fingerprint + options_fingerprint;
-}
-
-ProgramCache::ProgramDescriptor::ProgramDescriptor(uint64_t fingerprints)
-  : fingerprint(fingerprints), use_fingerprint(true)
-{
-}
-
-ProgramCache::ProgramCache(ProgramCache &&program_cache)
-  : use_fingerprints_(program_cache.use_fingerprints_),
-    programs_(std::move(program_cache.programs_))
-{
-}
-
-ProgramCache &ProgramCache::operator=(ProgramCache &&program_cache)
-{
-  if (this != &program_cache)
-  {
-    use_fingerprints_ = program_cache.use_fingerprints_;
-    programs_ = std::move(program_cache.programs_);
-  }
-  return *this;
-}
-
-absl::Status ProgramCache::GetOrCreateCLKernel(const std::string &code,
-                                               const std::string &function_name,
-                                               const std::vector<CompilerOptions> &compiler_options,
-                                               const CLContext &context, const CLDevice &device,
-                                               CLKernel *result)
-{
-  const std::string options = CompilerOptionsToString(device, compiler_options);
-  ProgramDescriptor desc{code, options, use_fingerprints_};
-  auto it = programs_.find(desc);
-  if (it != programs_.end())
-  {
-    return result->CreateFromProgram(it->second, function_name);
-  }
-
-  CLProgram program;
-  RETURN_IF_ERROR(CreateCLProgram(code, options, context, device, &program));
-  RETURN_IF_ERROR(result->CreateFromProgram(program, function_name));
-  programs_.insert(std::make_pair(std::move(desc), std::move(program)));
-  return absl::OkStatus();
-}
-
-absl::Status ProgramCache::GetOrCreateCLKernel(const std::string &code,
-                                               const std::string &function_name,
-                                               const CLContext &context, const CLDevice &device,
-                                               CLKernel *result)
-{
-  return GetOrCreateCLKernel(code, function_name, {}, context, device, result);
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ProgramCache.h b/runtime/onert/backend/gpu_cl/open_cl/ProgramCache.h
deleted file mode 100644
index 3f5ee0215..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/ProgramCache.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_PROGRAM_CACHE_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_PROGRAM_CACHE_H__
-
-#include <cstdint>
-#include <string>
-#include <vector>
-
-#include "absl/container/flat_hash_map.h"
-#include "absl/types/span.h"
-#include "ClContext.h"
-#include "ClDevice.h"
-#include "ClKernel.h"
-#include "ClProgram.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-class ProgramCache
-{
-public:
-  ProgramCache() = default;
-
-  ProgramCache(ProgramCache &&program_cache);
-  ProgramCache &operator=(ProgramCache &&program_cache);
-  ProgramCache(const ProgramCache &) = delete;
-  ProgramCache &operator=(const ProgramCache &) = delete;
-
-  absl::Status GetOrCreateCLKernel(const std::string &code, const std::string &function_name,
-                                   const std::vector<CompilerOptions> &compiler_options,
-                                   const CLContext &context, const CLDevice &device,
-                                   CLKernel *result);
-
-  absl::Status GetOrCreateCLKernel(const std::string &code, const std::string &function_name,
-                                   const CLContext &context, const CLDevice &device,
-                                   CLKernel *result);
-
-private:
-  struct ProgramDescriptor
-  {
-    ProgramDescriptor() = default;
-    ProgramDescriptor(const std::string &code_text, const std::string &options,
-                      bool use_fingerprint);
-    explicit ProgramDescriptor(uint64_t fingerprint);
-
-    std::string code;
-    std::string compiler_options;
-    uint64_t fingerprint;
-    bool use_fingerprint;
-  };
-  struct ProgramDescriptorHasher
-  {
-    std::size_t operator()(const ProgramDescriptor &k) const
-    {
-      if (k.use_fingerprint)
-      {
-        return std::hash<uint64_t>()(k.fingerprint);
-      }
-      else
-      {
-        return std::hash<std::string>()(k.code) + std::hash<std::string>()(k.compiler_options);
-      }
-    }
-  };
-  struct ProgramDescriptorEqual
-  {
-    bool operator()(const ProgramDescriptor &a, const ProgramDescriptor &b) const
-    {
-      if (a.use_fingerprint && b.use_fingerprint)
-      {
-        return a.fingerprint == b.fingerprint;
-      }
-      else
-      {
-        return a.compiler_options == b.compiler_options && a.code == b.code;
-      }
-    }
-  };
-
-  // There is a low probability of a hash collision when cache is deserialized
-  // because only fingerprints are serialized instead of full source code.
-  bool use_fingerprints_ = false;
-  absl::flat_hash_map<ProgramDescriptor, CLProgram, ProgramDescriptorHasher, ProgramDescriptorEqual>
-    programs_;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_PROGRAM_CACHE_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Shape.cc b/runtime/onert/backend/gpu_cl/open_cl/Shape.cc
deleted file mode 100644
index 5a2374516..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Shape.cc
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Shape.h"
-
-#include <stdint.h>
-
-#include <string>
-#include <vector>
-
-#include "absl/strings/str_cat.h"
-#include "absl/strings/str_join.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-struct GetAxisByIndexFunc
-{
-  template <Layout T> Axis operator()() const { return GetAxis<T>(index); }
-  int32_t index;
-};
-
-struct GetIndexByAxisFunc
-{
-  template <Layout T> int operator()() const { return GetAxisIndex<T>(axis); }
-  Axis axis;
-};
-
-struct NumAxisFunc
-{
-  template <Layout T> int operator()() const { return Size<T>(); }
-};
-
-} // namespace
-
-std::string ToString(Axis axis)
-{
-  switch (axis)
-  {
-    case Axis::BATCH:
-      return "batch";
-    case Axis::CHANNELS:
-      return "channels";
-    case Axis::INPUT_CHANNELS:
-      return "input_channels";
-    case Axis::OUTPUT_CHANNELS:
-      return "output_channels";
-    case Axis::HEIGHT:
-      return "height";
-    case Axis::WIDTH:
-      return "width";
-    case Axis::VALUE:
-      return "value";
-    case Axis::DEPTH:
-      return "depth";
-    case Axis::UNKNOWN:
-      return "unknown";
-  }
-  return "undefined";
-}
-
-std::string ToString(Layout layout)
-{
-  switch (layout)
-  {
-    case Layout::SCALAR:
-      return "scalar";
-    case Layout::LINEAR:
-      return "linear";
-    case Layout::HW:
-      return "hw";
-    case Layout::HWD:
-      return "hwd";
-    case Layout::CHW:
-      return "chw";
-    case Layout::HWC:
-      return "hwc";
-    case Layout::HWDC:
-      return "hwdc";
-    case Layout::OHWI:
-      return "ohwi";
-    case Layout::IHWO:
-      return "ihwo";
-    case Layout::OIHW:
-      return "oihw";
-    case Layout::IOHW:
-      return "iohw";
-    case Layout::BHWC:
-      return "bhwc";
-    case Layout::BHWDC:
-      return "bhwdc";
-    case Layout::OHWDI:
-      return "ohwi";
-    case Layout::UNKNOWN:
-      return "unknown";
-  }
-  return "undefined";
-}
-
-Axis GetAxis(Layout layout, int32_t index)
-{
-  return DispatchByLayout(layout, GetAxisByIndexFunc{index});
-}
-
-int GetAxisIndex(Layout layout, Axis axis)
-{
-  return DispatchByLayout(layout, GetIndexByAxisFunc{axis});
-}
-
-bool HasAxis(Layout layout, Axis axis) { return GetAxisIndex(layout, axis) >= 0; }
-
-int Size(Layout layout) { return DispatchByLayout(layout, NumAxisFunc()); }
-
-std::string ToString(const Shape &s)
-{
-  return absl::StrCat("{", ToString(s.layout), ", {", absl::StrJoin(s.dimensions, ", "), "}}");
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Shape.h b/runtime/onert/backend/gpu_cl/open_cl/Shape.h
deleted file mode 100644
index 3767e106f..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Shape.h
+++ /dev/null
@@ -1,668 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_SHAPE_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_SHAPE_H__
-
-#include <stddef.h>
-#include <stdint.h>
-
-#include <array>
-#include <functional>
-#include <numeric>
-#include <string>
-#include <utility>
-#include <vector>
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum class Axis
-{
-  UNKNOWN = 0,
-  CHANNELS = 1,
-  INPUT_CHANNELS = 2,
-  OUTPUT_CHANNELS = 3,
-  HEIGHT = 4,
-  WIDTH = 5,
-  BATCH = 6,
-  VALUE = 7,
-  DEPTH = 8,
-};
-
-std::string ToString(Axis t);
-
-// Layout represents axis order.
-enum class Layout
-{
-  UNKNOWN = 0,
-  SCALAR = 1,
-  LINEAR = 2,
-  HW = 3,
-  CHW = 4,
-  HWC = 5,
-  OIHW = 6,
-  OHWI = 7,
-  IHWO = 8,
-  IOHW = 9,
-  BHWC = 10,
-  HWDC = 11,
-  BHWDC = 12,
-  HWD = 13,
-  OHWDI = 14,
-};
-
-std::string ToString(Layout l);
-
-// Returns number of axis for the fixed layout.
-template <Layout T> constexpr int Size();
-
-// Returns number of axis for the given layout.
-int Size(Layout layout);
-
-// Returns Axis for the given index and fixed layout.
-template <Layout T> constexpr Axis GetAxis(int index);
-
-// Returns axis for the given layout and index.
-Axis GetAxis(Layout layout, int32_t index);
-
-// Returns axis index for the given axis and fixed layout.
-template <Layout T> constexpr int GetAxisIndex(Axis axis);
-
-// Returns axis index for the given layout and axis.
-int GetAxisIndex(Layout layout, Axis axis);
-
-// Checks if fixed layout has given axis
-template <Layout T> constexpr bool HasAxis(Axis axis);
-
-// Checks if given layout has given axis
-bool HasAxis(Layout layout, Axis axis);
-
-// Stores Layout(axis set and order) and value for dimensions.
-struct Shape
-{
-  Shape() : layout(Layout::UNKNOWN), dimensions() {}
-
-  explicit Shape(Layout t) : layout(t), dimensions(Size(t)) {}
-
-  Shape(Layout t, std::vector<int32_t> d) : layout(t), dimensions(std::move(d)) {}
-
-  bool operator==(const Shape &other) const
-  {
-    return (layout == other.layout) && (dimensions == other.dimensions);
-  }
-
-  bool operator!=(const Shape &other) const { return !operator==(other); }
-
-  // All methods below are matching same methods defined in StrongShape to
-  // make sure generic algorithms work both ways.
-
-  // Returns back a dimension or -1 if it is not found.
-  template <Axis D> int32_t get() const;
-  int32_t get(Axis axis) const;
-
-  template <Axis D> bool set(int32_t t);
-  bool set(Axis axis, int32_t t);
-
-  Axis axis(int index) const { return GetAxis(layout, index); }
-
-  int index(Axis axis) const { return GetAxisIndex(layout, axis); }
-
-  bool has(Axis axis) const { return HasAxis(layout, axis); }
-
-  int64_t DimensionsProduct() const
-  {
-    return std::accumulate(dimensions.begin(), dimensions.end(), 1ll, std::multiplies<int64_t>());
-  }
-
-  Layout layout = Layout::UNKNOWN;
-
-  std::vector<int32_t> dimensions;
-};
-
-std::string ToString(const Shape &s);
-
-// StrongShape provides convenient explicit access to dimensions stored in
-// shape, e.g. StrongShape<Layout::HW> s; provides s.h and s.w accessors.
-//
-// There is a conversion possible both ways between Shape and StrongShape.
-//
-//   OIHW oihw;  // specific shape
-//   Shape l = oihw.ToShape();
-//
-//   OHWI other;  // notice not the same but compatible shape.
-//   if (!other.Adopt(l)) {
-//     // error handling
-//   }
-//
-// StrongShape supports the following set of operations:
-//
-//   // Returns number of axis in the shape class.
-//   static constexpr int size();
-//
-//   // Returns Axis for the given index or Axis::UNKNOWN if index
-//   // falls outside of the defined range in this shape.
-//   static constexpr Axis axis(int index);
-//
-//   // Returns index for the given axis or -1 if axis is not defined in this
-//   // shape.
-//   static constexpr int index(Axis axis);
-//
-//   // Getters
-//   int32_t get(int index) const;
-//   int32_t get(Axis axis) const;
-//   int32_t get<Axis>() const;
-//
-//   // Setters that return false if set was not successful.
-//   bool set(int index, int32_t v);
-//   bool set(Axis axis, int32_t v);
-//   bool set<Axis>(int32_t v);
-//
-//   // Returns shape's layout.
-//   static const Layout layout;
-//
-//   // Turns specific shape into generic shape.
-//   Shape ToShape() const;
-//
-//   // Copies all dimensions from the given shape.
-//   bool Adopt(const Shape&);
-//
-template <Layout L> struct StrongShape;
-
-using Scalar = StrongShape<Layout::SCALAR>;
-using Linear = StrongShape<Layout::LINEAR>;
-using HW = StrongShape<Layout::HW>;
-using HWD = StrongShape<Layout::HWD>;
-
-// Common tensor shape for CNN models working with images.
-using CHW = StrongShape<Layout::CHW>;
-using HWC = StrongShape<Layout::HWC>;
-using HWDC = StrongShape<Layout::HWDC>;
-using BHWC = StrongShape<Layout::BHWC>;
-using BHWDC = StrongShape<Layout::BHWDC>;
-
-// Tensor shape used in convolution_2d weights.
-using OIHW = StrongShape<Layout::OIHW>;
-using OHWI = StrongShape<Layout::OHWI>;
-using IHWO = StrongShape<Layout::IHWO>;
-using IOHW = StrongShape<Layout::IOHW>;
-
-// Tensor shape used in convolution_3d weights.
-using OHWDI = StrongShape<Layout::OHWDI>;
-
-// -----------------------------------------------------------------------------
-// Everything below are internal implementation details.
-// -----------------------------------------------------------------------------
-
-namespace internal_shape
-{
-
-template <Axis T> struct AxisTraits;
-
-#define TFLITE_GPU_AXIS_TRAITS(AxisName, HolderName)    \
-  template <> struct AxisTraits<Axis::AxisName>         \
-  {                                                     \
-    struct Holder                                       \
-    {                                                   \
-      int32_t HolderName;                               \
-                                                        \
-    protected:                                          \
-      int32_t operator()() const { return HolderName; } \
-      void operator()(int32_t v) { HolderName = v; }    \
-    };                                                  \
-                                                        \
-    using dimension_holder_type = Holder;               \
-  }
-
-TFLITE_GPU_AXIS_TRAITS(CHANNELS, c);
-TFLITE_GPU_AXIS_TRAITS(HEIGHT, h);
-TFLITE_GPU_AXIS_TRAITS(WIDTH, w);
-TFLITE_GPU_AXIS_TRAITS(INPUT_CHANNELS, i);
-TFLITE_GPU_AXIS_TRAITS(OUTPUT_CHANNELS, o);
-TFLITE_GPU_AXIS_TRAITS(BATCH, b);
-TFLITE_GPU_AXIS_TRAITS(VALUE, v);
-TFLITE_GPU_AXIS_TRAITS(DEPTH, d);
-
-#undef TFLITE_GPU_AXIS_TRAITS
-
-template <int N, Axis... As> struct StrongShapeImpl;
-
-template <int N> struct StrongShapeImpl<N>
-{
-  static constexpr int size() { return N; }
-
-  static constexpr Axis axis(int) { return Axis::UNKNOWN; }
-
-  static constexpr int index(Axis) { return -1; }
-
-  static constexpr bool has(Axis) { return false; }
-
-  int32_t get(Axis) const { return -1; }
-
-  int32_t get(int) const { return -1; }
-
-  template <Axis B> int32_t get() const { return -1; }
-
-  bool set(Axis, int32_t) { return false; }
-
-  bool set(int, int32_t) { return false; }
-
-  template <Axis B> bool set(int32_t) { return false; }
-};
-
-// Used to deduce number of axis, and to be a child of a proper holder to
-// provide access to the dimension by name
-template <int N, Axis A, Axis... As>
-struct StrongShapeImpl<N, A, As...> : public AxisTraits<A>::dimension_holder_type,
-                                      public StrongShapeImpl<N + 1, As...>
-{
-  using dimension_holder_type = typename AxisTraits<A>::dimension_holder_type;
-
-  using rest_type = StrongShapeImpl<N + 1, As...>;
-
-  StrongShapeImpl() : dimension_holder_type{0}, rest_type() {}
-
-  template <typename... Ts>
-  explicit StrongShapeImpl(int32_t t, Ts... ts) : dimension_holder_type{t}, rest_type(ts...)
-  {
-  }
-
-  static constexpr Axis axis(int index) { return index == N ? A : rest_type::axis(index); }
-
-  static constexpr int index(Axis axis) { return axis == A ? N : rest_type::index(axis); }
-
-  static constexpr bool has(Axis axis) { return axis == A ? true : rest_type::has(axis); }
-
-  int32_t get(Axis axis) const
-  {
-    return axis == A ? dimension_holder_type::operator()() : rest_type::get(axis);
-  }
-
-  template <Axis B> int32_t get() const
-  {
-    return B == A ? dimension_holder_type::operator()() : rest_type::template get<B>();
-  }
-
-  int32_t get(int index) const
-  {
-    return index == N ? dimension_holder_type::operator()() : rest_type::get(index);
-  }
-
-  bool set(Axis axis, int32_t t)
-  {
-    if (axis == A)
-    {
-      dimension_holder_type::operator()(t);
-      return true;
-    }
-    return rest_type::set(axis, t);
-  }
-
-  bool set(int index, int32_t t)
-  {
-    if (index == N)
-    {
-      dimension_holder_type::operator()(t);
-      return true;
-    }
-    return rest_type::set(index, t);
-  }
-
-  template <Axis B> bool set(int32_t t)
-  {
-    if (A == B)
-    {
-      dimension_holder_type::operator()(t);
-      return true;
-    }
-    return rest_type::template set<B>(t);
-  }
-};
-
-template <Layout T> struct LayoutTraits;
-
-#define TFLITE_GPU_LAYOUT_TRAITS(LayoutName, ...)              \
-  template <> struct LayoutTraits<Layout::LayoutName>          \
-  {                                                            \
-    using strong_shape_type = StrongShapeImpl<0, __VA_ARGS__>; \
-  }
-
-TFLITE_GPU_LAYOUT_TRAITS(HW, Axis::HEIGHT, Axis::WIDTH);
-TFLITE_GPU_LAYOUT_TRAITS(HWD, Axis::HEIGHT, Axis::WIDTH, Axis::DEPTH);
-TFLITE_GPU_LAYOUT_TRAITS(OHWI, Axis::OUTPUT_CHANNELS, Axis::HEIGHT, Axis::WIDTH,
-                         Axis::INPUT_CHANNELS);
-TFLITE_GPU_LAYOUT_TRAITS(OIHW, Axis::OUTPUT_CHANNELS, Axis::INPUT_CHANNELS, Axis::HEIGHT,
-                         Axis::WIDTH);
-TFLITE_GPU_LAYOUT_TRAITS(IOHW, Axis::INPUT_CHANNELS, Axis::OUTPUT_CHANNELS, Axis::HEIGHT,
-                         Axis::WIDTH);
-TFLITE_GPU_LAYOUT_TRAITS(IHWO, Axis::INPUT_CHANNELS, Axis::HEIGHT, Axis::WIDTH,
-                         Axis::OUTPUT_CHANNELS);
-TFLITE_GPU_LAYOUT_TRAITS(CHW, Axis::CHANNELS, Axis::HEIGHT, Axis::WIDTH);
-TFLITE_GPU_LAYOUT_TRAITS(HWC, Axis::HEIGHT, Axis::WIDTH, Axis::CHANNELS);
-TFLITE_GPU_LAYOUT_TRAITS(HWDC, Axis::HEIGHT, Axis::WIDTH, Axis::DEPTH, Axis::CHANNELS);
-TFLITE_GPU_LAYOUT_TRAITS(LINEAR, Axis::VALUE);
-TFLITE_GPU_LAYOUT_TRAITS(SCALAR, Axis::VALUE);
-TFLITE_GPU_LAYOUT_TRAITS(BHWC, Axis::BATCH, Axis::HEIGHT, Axis::WIDTH, Axis::CHANNELS);
-TFLITE_GPU_LAYOUT_TRAITS(BHWDC, Axis::BATCH, Axis::HEIGHT, Axis::WIDTH, Axis::DEPTH,
-                         Axis::CHANNELS);
-TFLITE_GPU_LAYOUT_TRAITS(OHWDI, Axis::OUTPUT_CHANNELS, Axis::HEIGHT, Axis::WIDTH, Axis::DEPTH,
-                         Axis::INPUT_CHANNELS);
-
-#undef TFLITE_GPU_LAYOUT_TRAITS
-
-template <> struct LayoutTraits<Layout::UNKNOWN>
-{
-  using strong_shape_type = StrongShapeImpl<0>;
-};
-
-template <Axis A> struct DimensionGetterFixedAxisFunc
-{
-  template <Layout T> int32_t operator()() const
-  {
-    constexpr int i = GetAxisIndex<T>(A);
-    return i >= 0 && i < l->dimensions.size() ? l->dimensions[i] : -1;
-  }
-  const Shape *l;
-};
-
-struct DimensionGetterFunc
-{
-  template <Layout T> int32_t operator()() const
-  {
-    uint32_t i = GetAxisIndex<T>(axis);
-    return i < l->dimensions.size() ? l->dimensions[i] : -1;
-  }
-  Axis axis;
-  const Shape *l;
-};
-
-template <Axis A> struct DimensionSetterFixedAxisFunc
-{
-  template <Layout T> bool operator()() const
-  {
-    constexpr uint32_t i = GetAxisIndex<T>(A);
-    if (i < l->dimensions.size())
-    {
-      l->dimensions[i] = v;
-      return true;
-    }
-    return false;
-  }
-  Shape *l;
-  int32_t v;
-};
-
-struct DimensionSetterFunc
-{
-  template <Layout T> bool operator()() const
-  {
-    uint32_t i = GetAxisIndex<T>(axis);
-    if (i < l->dimensions.size())
-    {
-      l->dimensions[i] = v;
-      return true;
-    }
-    return false;
-  }
-  Axis axis;
-  Shape *l;
-  int32_t v;
-};
-
-template <Layout L> struct ToShapeFunc
-{
-  template <Layout T> bool operator()() const
-  {
-    for (int i = 0; i < StrongShape<L>::size(); ++i)
-    {
-      int index = GetAxisIndex<T>(StrongShape<L>::axis(i));
-      if (index < 0)
-        return false;
-      shape->set(i, l.dimensions[index]);
-    }
-    return true;
-  }
-
-  StrongShape<L> *shape;
-  const Shape &l;
-};
-
-} // namespace internal_shape
-
-// template <Axis... As>
-template <Layout L> struct StrongShape : public internal_shape::LayoutTraits<L>::strong_shape_type
-{
-  using strong_shape_type = typename internal_shape::LayoutTraits<L>::strong_shape_type;
-  StrongShape() = default;
-
-  template <typename... Ts> explicit StrongShape(Ts... t) : strong_shape_type(t...) {}
-
-  constexpr static Layout layout = L;
-
-  bool operator==(const StrongShape<L> &shape) const
-  {
-    // TODO(akulik): implement better alternative.
-    return this->ToShape() == shape.ToShape();
-  }
-
-  bool operator!=(const StrongShape<L> &shape) const
-  {
-    // TODO(akulik): implement better alternative.
-    return this->ToShape() != shape.ToShape();
-  }
-  bool empty() const { return DimensionsProduct() == 0; }
-
-  // Turns StrongShape into generic shape.
-  Shape ToShape() const
-  {
-    std::vector<int32_t> dimensions(StrongShape::size());
-    for (int i = 0; i < StrongShape::size(); ++i)
-    {
-      dimensions[i] = StrongShape::get(i);
-    }
-    return Shape(L, std::move(dimensions));
-  }
-
-  // @return all dimensions multiplied
-  int64_t DimensionsProduct() const
-  {
-    int64_t product = 1;
-    for (int i = 0; i < StrongShape::size(); ++i)
-    {
-      product *= StrongShape::get(i);
-    }
-    return product;
-  }
-
-  // Translates given coordinates of the layout into a linear index assuming
-  // dimensions are sorted in tensor access order e.g. if you access
-  // foobar[i][j][k] order of coordinates should be i,j,k.
-  int64_t LinearIndex(const std::array<int32_t, StrongShape::size()> &coordinates) const
-  {
-    int64_t index = coordinates[0];
-    for (int i = 1; i < StrongShape::size(); ++i)
-    {
-      index = index * StrongShape::get(i) + coordinates[i];
-    }
-    return index;
-  }
-
-  // Copies all dimensions from the given generic shape into specific shape.
-  // It requires shape to have all axis defined in the given
-  // StrongShape. For example:
-  //   - If this shape is OHWI but given shape is OIHW, Adopt will copy all
-  //     dimensions and return true.
-  //   - If this shape is OIHW but input shape is HW, Adopt will copy H and W
-  //     dimensions and return true, but if this shape is HW and given shape
-  //     OIHW, then Adopt will return false because not all axis are present in
-  //     the input shape.
-  //
-  // @return false if generic shape is not compatible.
-  bool Adopt(const Shape &shape)
-  {
-    return DispatchByLayout(shape.layout, internal_shape::ToShapeFunc<L>{this, shape});
-  }
-
-  // For all axis defined in a given shape copies values to this shape.
-  // Therefore, it is possible to copy dimensions from CHW to BCHW, but not
-  // the other way around.
-  //
-  // BCHW bchw;
-  // CHW chw;
-  // bchw.CopyAllGivenAxis(chw);  --> true
-  // chw.CopyAllGivenAxis(bchw);  --> false
-  //
-  // @return false if axis in source shape is not defined here, thus value
-  //         was not copied.
-  template <Layout B> bool CopyAllGivenAxis(const StrongShape<B> &source)
-  {
-    for (int i = 0; i < source.size(); ++i)
-    {
-      if (!StrongShape::set(source.axis(i), source.get(i)))
-      {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  // For all axis defined in this shape copies values from the given shape.
-  //
-  // BCHW bchw;
-  // CHW chw;
-  // bchw.CopyAllDefinedAxis(chw);  --> false
-  // chw.CopyAllDefinedAxis(bchw);  --> true
-  //
-  // @return false if given shape does not have axis defined here,
-  //         therefore a value was not copied.
-  template <Layout B> bool CopyAllDefinedAxis(const StrongShape<B> &source)
-  {
-    for (int i = 0; i < StrongShape::size(); ++i)
-    {
-      int source_index = source.index(StrongShape::axis(i));
-      if (source_index < 0)
-      {
-        return false;
-      }
-      StrongShape::set(i, source.get(source_index)); // always true
-    }
-    return true;
-  }
-
-  // Copies values only for matching axis.
-  template <Layout B> void CopyMatchingAxis(const StrongShape<B> &source)
-  {
-    for (int i = 0; i < StrongShape::size(); ++i)
-    {
-      StrongShape::set(source.axis(i), source.get(i));
-    }
-  }
-
-  // AbslHash function for using in flat hash containers.
-  template <typename H> friend H AbslHashValue(H hash_state, const StrongShape &strong_shape)
-  {
-    for (size_t i = 0; i < strong_shape.size(); ++i)
-    {
-      hash_state = H::combine(std::move(hash_state), strong_shape.get(i));
-    }
-    return hash_state;
-  }
-};
-
-template <Layout T> inline std::string ToString(const StrongShape<T> &s)
-{
-  return ToString(s.ToShape());
-}
-
-template <Layout L> constexpr Layout StrongShape<L>::layout;
-
-template <class F>
-auto DispatchByLayout(Layout type, F f) -> decltype(f.template operator()<Layout::UNKNOWN>())
-{
-  switch (type)
-  {
-    case Layout::HW:
-      return f.template operator()<Layout::HW>();
-    case Layout::HWD:
-      return f.template operator()<Layout::HWD>();
-    case Layout::HWC:
-      return f.template operator()<Layout::HWC>();
-    case Layout::HWDC:
-      return f.template operator()<Layout::HWDC>();
-    case Layout::CHW:
-      return f.template operator()<Layout::CHW>();
-    case Layout::OIHW:
-      return f.template operator()<Layout::OIHW>();
-    case Layout::IOHW:
-      return f.template operator()<Layout::IOHW>();
-    case Layout::OHWI:
-      return f.template operator()<Layout::OHWI>();
-    case Layout::IHWO:
-      return f.template operator()<Layout::IHWO>();
-    case Layout::LINEAR:
-      return f.template operator()<Layout::LINEAR>();
-    case Layout::SCALAR:
-      return f.template operator()<Layout::SCALAR>();
-    case Layout::BHWC:
-      return f.template operator()<Layout::BHWC>();
-    case Layout::BHWDC:
-      return f.template operator()<Layout::BHWDC>();
-    case Layout::OHWDI:
-      return f.template operator()<Layout::OHWDI>();
-    case Layout::UNKNOWN:
-      return f.template operator()<Layout::UNKNOWN>();
-  }
-  return f.template operator()<Layout::UNKNOWN>();
-}
-
-template <Layout T> constexpr int Size() { return StrongShape<T>::size(); }
-
-template <Layout T> constexpr Axis GetAxis(int index) { return StrongShape<T>::axis(index); }
-
-template <Layout T> constexpr int GetAxisIndex(Axis axis) { return StrongShape<T>::index(axis); }
-
-template <Layout T> constexpr bool HasAxis(Axis axis) { return StrongShape<T>::has(axis); }
-
-template <Axis D> inline int32_t Shape::get() const
-{
-  return DispatchByLayout(layout, internal_shape::DimensionGetterFixedAxisFunc<D>{this});
-}
-
-inline int32_t Shape::get(Axis axis) const
-{
-  return DispatchByLayout(layout, internal_shape::DimensionGetterFunc{axis, this});
-}
-
-template <Axis D> inline bool Shape::set(int32_t t)
-{
-  return DispatchByLayout(layout, internal_shape::DimensionSetterFixedAxisFunc<D>{this, t});
-}
-
-inline bool Shape::set(Axis axis, int32_t t)
-{
-  return DispatchByLayout(layout, internal_shape::DimensionSetterFunc{axis, this, t});
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_SHAPE_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Spi.h b/runtime/onert/backend/gpu_cl/open_cl/Spi.h
deleted file mode 100644
index c1d65b67e..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Spi.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPEN_CL_SPI_H__
-#define __ONERT_BACKEND_GPU_CL_OPEN_CL_SPI_H__
-
-#include <cstdint>
-
-#include "Api.h"
-#include "AccessType.h"
-#include "Status.h"
-
-// Contains only service provider-related interfaces. Users should not use them
-// directly.
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// Converts a tensor object into another one.
-class TensorObjectConverter
-{
-public:
-  virtual ~TensorObjectConverter() = default;
-
-  virtual absl::Status Convert(const TensorObject &input, const TensorObject &output) = 0;
-};
-
-class TensorObjectConverterBuilder
-{
-public:
-  virtual ~TensorObjectConverterBuilder() = default;
-
-  virtual bool IsSupported(const TensorObjectDef &input, const TensorObjectDef &output) const = 0;
-
-  virtual absl::Status MakeConverter(const TensorObjectDef &input, const TensorObjectDef &output,
-                                     std::unique_ptr<TensorObjectConverter> *converter) = 0;
-};
-
-// Connects tensor definition provided by a user (external) with tensor
-// definition used by the inference engine (internal).
-struct TensorTieDef
-{
-  uint32_t id;
-  AccessType access_type;
-  TensorObjectDef internal_def;
-  TensorObjectDef external_def;
-};
-
-// Connects external tensor object to internal tensor object and provides
-// functionality to copy data to/from external object to internal.
-class TensorTie
-{
-public:
-  explicit TensorTie(const TensorTieDef &def) : def_(def) {}
-
-  virtual ~TensorTie() = default;
-
-  virtual absl::Status SetExternalObject(TensorObject obj) = 0;
-
-  virtual TensorObject GetExternalObject() = 0;
-
-  virtual absl::Status CopyToExternalObject() = 0;
-
-  virtual absl::Status CopyFromExternalObject() = 0;
-
-  const TensorTieDef &def() const { return def_; }
-
-private:
-  const TensorTieDef def_;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPEN_CL_SPI_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Status.h b/runtime/onert/backend/gpu_cl/open_cl/Status.h
deleted file mode 100644
index 6295a7e77..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Status.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_STATUS_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_STATUS_H__
-
-#include "absl/status/status.h" // IWYU pragma: export
-#define RETURN_IF_ERROR(s) \
-  {                        \
-    auto c = (s);          \
-    if (!c.ok())           \
-      return c;            \
-  } // IWYU pragma: export
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_STATUS_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/StorageTypeUtil.cc b/runtime/onert/backend/gpu_cl/open_cl/StorageTypeUtil.cc
deleted file mode 100644
index eada697ac..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/StorageTypeUtil.cc
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "StorageTypeUtil.h"
-
-#include "TensorType.h"
-#include "DataType.h"
-#include "Shape.h"
-#include "Util.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-bool CanCreateTensorWithShape(const DeviceInfo &device_info, const BHWDC &shape,
-                              const TensorDescriptor &descriptor)
-{
-  const int slices = DivideRoundUp(shape.c, 4);
-  switch (descriptor.storage_type)
-  {
-    case TensorStorageType::BUFFER:
-    {
-      const uint64_t flt4_size = 4 * (descriptor.data_type == DataType::FLOAT32 ? 4 : 2);
-      const uint64_t buffer_size = shape.b * shape.w * shape.h * shape.d * slices * flt4_size;
-      return buffer_size <= device_info.buffer_max_size;
-    }
-    case TensorStorageType::IMAGE_BUFFER:
-      return (uint64_t)shape.b * shape.w * shape.h * shape.d * slices <=
-             device_info.image_buffer_max_size;
-    case TensorStorageType::TEXTURE_3D:
-      if (device_info.cl_version < OpenCLVersion::CL_1_2 && slices == 1)
-      {
-        // clCreateImage3D (that used in CL 1.0/1.1) can not create image with
-        // depth = 1 by specification;
-        return false;
-      }
-      return (uint64_t)shape.w * shape.b <= device_info.image3d_max_width &&
-             (uint64_t)shape.h <= device_info.image3d_max_height &&
-             (uint64_t)slices * shape.d <= device_info.image3d_max_depth;
-    case TensorStorageType::TEXTURE_ARRAY:
-      // Bug on some Adreno. b/131099086
-      if (slices == 1 && !device_info.SupportsOneLayerTextureArray())
-      {
-        return false;
-      }
-      return (uint64_t)shape.w * shape.b <= device_info.image2d_max_width &&
-             (uint64_t)shape.h <= device_info.image2d_max_height &&
-             (uint64_t)slices * shape.d <= device_info.image_array_max_layers;
-    case TensorStorageType::TEXTURE_2D:
-      return (uint64_t)shape.w * shape.b * shape.d <= device_info.image2d_max_width &&
-             (uint64_t)shape.h * slices <= device_info.image2d_max_height;
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      return (uint64_t)shape.c <= 4 &&
-             device_info.SupportsFloatImage2D(descriptor.data_type, shape.c) &&
-             (uint64_t)shape.w * shape.b * shape.d <= device_info.image2d_max_width &&
-             (uint64_t)shape.h <= device_info.image2d_max_height;
-    default:
-      return false;
-  }
-}
-
-bool CanCreateTensorWithShape(const DeviceInfo &device_info, const BHWC &shape,
-                              const TensorDescriptor &descriptor)
-{
-  const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
-  return CanCreateTensorWithShape(device_info, shape5D, descriptor);
-}
-
-TensorStorageType SelectBestStorageType(const DeviceInfo &device_info, const BHWC &shape,
-                                        const TensorStorageType &desired, const DataType &data_type,
-                                        const Layout &layout)
-{
-  if (CanCreateTensorWithShape(device_info, shape, TensorDescriptor{data_type, desired, layout}))
-  {
-    return desired;
-  }
-  auto GetBestTypeAfterTextureArray = [&]() {
-    if (device_info.SupportsImageBuffer() &&
-        CanCreateTensorWithShape(
-          device_info, shape, TensorDescriptor{data_type, TensorStorageType::IMAGE_BUFFER, layout}))
-    {
-      return TensorStorageType::IMAGE_BUFFER;
-    }
-    else
-    {
-      return TensorStorageType::BUFFER;
-    }
-  };
-  auto GetBestTypeAfterTexture2D = [&]() {
-    if (device_info.SupportsTextureArray() &&
-        CanCreateTensorWithShape(
-          device_info, shape,
-          TensorDescriptor{data_type, TensorStorageType::TEXTURE_ARRAY, layout}))
-    {
-      return TensorStorageType::TEXTURE_ARRAY;
-    }
-    else
-    {
-      return GetBestTypeAfterTextureArray();
-    }
-  };
-  auto GetBestTypeAfterTexture3D = [&]() {
-    if (CanCreateTensorWithShape(
-          device_info, shape, TensorDescriptor{data_type, TensorStorageType::TEXTURE_2D, layout}))
-    {
-      return TensorStorageType::TEXTURE_2D;
-    }
-    else
-    {
-      return GetBestTypeAfterTexture2D();
-    }
-  };
-  switch (desired)
-  {
-    case TensorStorageType::TEXTURE_2D:
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      return GetBestTypeAfterTexture2D();
-    case TensorStorageType::TEXTURE_ARRAY:
-      return GetBestTypeAfterTextureArray();
-    case TensorStorageType::TEXTURE_3D:
-      return GetBestTypeAfterTexture3D();
-    case TensorStorageType::IMAGE_BUFFER:
-    case TensorStorageType::BUFFER:
-      return TensorStorageType::BUFFER;
-    default:
-      return TensorStorageType::BUFFER;
-  }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/StorageTypeUtil.h b/runtime/onert/backend/gpu_cl/open_cl/StorageTypeUtil.h
deleted file mode 100644
index a84c3865f..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/StorageTypeUtil.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_STORAGE_TYPE_UTIL_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_STORAGE_TYPE_UTIL_H__
-
-#include "DeviceInfo.h"
-#include "TensorType.h"
-#include "DataType.h"
-#include "Shape.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-bool CanCreateTensorWithShape(const DeviceInfo &device_info, const BHWDC &shape,
-                              const TensorDescriptor &descriptor);
-
-bool CanCreateTensorWithShape(const DeviceInfo &device_info, const BHWC &shape,
-                              const TensorDescriptor &descriptor);
-
-TensorStorageType SelectBestStorageType(const DeviceInfo &device_info, const BHWC &shape,
-                                        const TensorStorageType &desired, const DataType &data_type,
-                                        const Layout &layout);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_STORAGE_TYPE_UTIL_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Tensor.cc b/runtime/onert/backend/gpu_cl/open_cl/Tensor.cc
deleted file mode 100644
index 983e0d29d..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Tensor.cc
+++ /dev/null
@@ -1,690 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Tensor.h"
-
-#include <cstring>
-#include <vector>
-
-#include "absl/strings/str_cat.h"
-
-#include "Buffer.h"
-#include "ClImageFormat.h"
-#include "ClMemory.h"
-#include "GpuObject.h"
-#include "TensorType.h"
-#include "InternalTensor.h"
-#include "DataType.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-absl::Status AllocateTensorMemory(const CLContext &context, const BHWDC &shape,
-                                  const TensorDescriptor &descriptor, const void *data_ptr,
-                                  CLMemory *result)
-{
-  const int slices = DivideRoundUp(shape.c, 4);
-  cl_mem_flags mem_flags = CL_MEM_READ_WRITE;
-  if (data_ptr)
-  {
-    mem_flags |= CL_MEM_COPY_HOST_PTR;
-  }
-  switch (descriptor.storage_type)
-  {
-    case TensorStorageType::BUFFER:
-    case TensorStorageType::IMAGE_BUFFER:
-    {
-      const size_t data_size =
-        shape.b * shape.w * shape.h * shape.d * slices * 4 * SizeOf(descriptor.data_type);
-      cl_int error_code;
-      cl_mem memory = clCreateBuffer(context.context(), mem_flags, data_size,
-                                     const_cast<void *>(data_ptr), &error_code);
-      if (!memory)
-      {
-        return absl::UnknownError(absl::StrCat(
-          "Failed to allocate device memory (clCreateBuffer): ", CLErrorCodeToString(error_code)));
-      }
-      *result = CLMemory(memory, true);
-      return absl::OkStatus();
-    }
-    case TensorStorageType::TEXTURE_2D:
-    {
-      cl_image_desc desc;
-      desc.image_type = CL_MEM_OBJECT_IMAGE2D;
-      desc.image_width = shape.w * shape.b * shape.d;
-      desc.image_height = shape.h * slices;
-      desc.image_depth = 0;
-      desc.image_row_pitch = 0;
-      desc.image_slice_pitch = 0;
-      desc.num_mip_levels = 0;
-      desc.num_samples = 0;
-      desc.buffer = nullptr;
-
-      cl_image_format format;
-      format.image_channel_order = CL_RGBA;
-      format.image_channel_data_type = ToImageChannelType(descriptor.data_type);
-
-      cl_int error_code;
-      cl_mem memory = CreateImage2DLegacy(context.context(), mem_flags, &format, &desc,
-                                          const_cast<void *>(data_ptr), &error_code);
-      if (error_code != CL_SUCCESS)
-      {
-        return absl::UnknownError(absl::StrCat("Failed to create 2D texture (clCreateImage): ",
-                                               CLErrorCodeToString(error_code)));
-      }
-
-      *result = CLMemory(memory, true);
-      return absl::OkStatus();
-    }
-    case TensorStorageType::TEXTURE_3D:
-    {
-      cl_image_desc desc;
-      desc.image_type = CL_MEM_OBJECT_IMAGE3D;
-      desc.image_width = shape.w * shape.b;
-      desc.image_height = shape.h;
-      desc.image_depth = slices * shape.d;
-      desc.image_row_pitch = 0;
-      desc.image_slice_pitch = 0;
-      desc.num_mip_levels = 0;
-      desc.num_samples = 0;
-      desc.buffer = nullptr;
-
-      cl_image_format format;
-      format.image_channel_order = CL_RGBA;
-      format.image_channel_data_type = ToImageChannelType(descriptor.data_type);
-
-      cl_int error_code;
-      cl_mem memory = CreateImage3DLegacy(context.context(), mem_flags, &format, &desc,
-                                          const_cast<void *>(data_ptr), &error_code);
-      if (error_code != CL_SUCCESS)
-      {
-        return absl::UnknownError(absl::StrCat("Failed to create 3D texture (clCreateImage): ",
-                                               CLErrorCodeToString(error_code)));
-      }
-
-      *result = CLMemory(memory, true);
-      return absl::OkStatus();
-    }
-    case TensorStorageType::TEXTURE_ARRAY:
-    {
-      cl_image_desc desc;
-      desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
-      desc.image_width = shape.w * shape.b;
-      desc.image_height = shape.h;
-      desc.image_depth = 0;
-      desc.image_array_size = slices * shape.d;
-      desc.image_row_pitch = 0;
-      desc.image_slice_pitch = 0;
-      desc.num_mip_levels = 0;
-      desc.num_samples = 0;
-      desc.buffer = nullptr;
-
-      cl_image_format format;
-      format.image_channel_order = CL_RGBA;
-      format.image_channel_data_type = ToImageChannelType(descriptor.data_type);
-
-      cl_int error_code;
-      cl_mem memory = clCreateImage(context.context(), mem_flags, &format, &desc,
-                                    const_cast<void *>(data_ptr), &error_code);
-      if (error_code != CL_SUCCESS)
-      {
-        return absl::UnknownError(absl::StrCat(
-          "Failed to create 2D texture array (clCreateImage): ", CLErrorCodeToString(error_code)));
-      }
-
-      *result = CLMemory(memory, true);
-      return absl::OkStatus();
-    }
-
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-    {
-      if (slices != 1)
-      {
-        return absl::InvalidArgumentError(absl::StrCat(
-          "SINGLE_TEXTURE_2D support only channels in range [1-4], but ", shape.c, "was provided"));
-      }
-      cl_image_desc desc;
-      desc.image_type = CL_MEM_OBJECT_IMAGE2D;
-      desc.image_width = shape.w * shape.b * shape.d;
-      desc.image_height = shape.h;
-      desc.image_depth = 0;
-      desc.image_row_pitch = 0;
-      desc.image_slice_pitch = 0;
-      desc.num_mip_levels = 0;
-      desc.num_samples = 0;
-      desc.buffer = nullptr;
-
-      cl_image_format format;
-      if (context.IsFloatTexture2DSupported(shape.c, descriptor.data_type))
-      {
-        format.image_channel_order = ToChannelOrder(shape.c);
-        format.image_channel_data_type = ToImageChannelType(descriptor.data_type);
-      }
-      else
-      {
-        return absl::InvalidArgumentError(
-          absl::StrCat("This device doesn't support ", shape.c, "-channel textures."));
-      }
-
-      cl_int error_code;
-      cl_mem memory = CreateImage2DLegacy(context.context(), mem_flags, &format, &desc,
-                                          const_cast<void *>(data_ptr), &error_code);
-      if (error_code != CL_SUCCESS)
-      {
-        return absl::UnknownError(absl::StrCat(
-          "Failed to create single 2D texture (clCreateImage): ", CLErrorCodeToString(error_code)));
-      }
-
-      *result = CLMemory(memory, true);
-      return absl::OkStatus();
-    }
-
-    default:
-      return absl::InternalError("Unsupported tensor storage type");
-  }
-}
-
-absl::Status CreateImageBufferFromBuffer(const CLContext &context, cl_mem memory,
-                                         DataType data_type, int width, cl_mem *result)
-{
-  cl_image_format format;
-  cl_image_desc desc;
-  std::memset(&desc, 0, sizeof(desc));
-  desc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER;
-  desc.image_width = width;
-  desc.mem_object = memory;
-
-  format.image_channel_data_type = ToImageChannelType(data_type);
-  format.image_channel_order = CL_RGBA;
-
-  cl_int error_code;
-  *result =
-    clCreateImage(context.context(), CL_MEM_READ_WRITE, &format, &desc, nullptr, &error_code);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(absl::StrCat("Failed to create Image from Buffer (clCreateImage): ",
-                                           CLErrorCodeToString(error_code)));
-  }
-  return absl::OkStatus();
-}
-
-absl::Status CreateTensor(const CLContext &context, const BHWDC &shape,
-                          const TensorDescriptor &descriptor, cl_mem memory, Tensor *result)
-{
-  const bool memory_owner = memory == nullptr;
-  if (memory_owner)
-  {
-    CLMemory mem;
-    RETURN_IF_ERROR(AllocateTensorMemory(context, shape, descriptor, nullptr, &mem));
-    memory = mem.Release();
-  }
-  if (descriptor.storage_type == TensorStorageType::IMAGE_BUFFER)
-  {
-    cl_mem image_memory;
-    RETURN_IF_ERROR(CreateImageBufferFromBuffer(
-      context, memory, descriptor.data_type,
-      shape.b * shape.w * shape.h * shape.d * DivideRoundUp(shape.c, 4), &image_memory));
-    *result = Tensor(memory, memory_owner, image_memory, shape, descriptor);
-  }
-  else
-  {
-    *result = Tensor(memory, memory_owner, shape, descriptor);
-  }
-  return absl::OkStatus();
-}
-
-absl::Status CreateTensorShared(const CLContext &context, const BHWDC &shape,
-                                const TensorDescriptor &descriptor, cl_mem memory, Tensor *result)
-{
-  const bool memory_owner = false;
-  if (descriptor.storage_type == TensorStorageType::IMAGE_BUFFER)
-  {
-    cl_mem image_memory;
-    RETURN_IF_ERROR(CreateImageBufferFromBuffer(
-      context, memory, descriptor.data_type,
-      shape.b * shape.w * shape.h * shape.d * DivideRoundUp(shape.c, 4), &image_memory));
-    *result = Tensor(memory, memory_owner, image_memory, shape, descriptor);
-  }
-  else
-  {
-    *result = Tensor(memory, memory_owner, shape, descriptor);
-  }
-  return absl::OkStatus();
-}
-
-} // namespace
-
-absl::Status TensorDescriptor::CreateGPUObject(CLContext *context, GPUObjectPtr *result) const
-{
-  Tensor gpu_tensor;
-  RETURN_IF_ERROR(gpu_tensor.CreateFromDescriptor(*this, context));
-  *result = absl::make_unique<Tensor>(std::move(gpu_tensor));
-  return absl::OkStatus();
-}
-
-Tensor::Tensor(cl_mem memory, bool memory_owner, const BHWC &shape,
-               const TensorDescriptor &descriptor)
-  : memory_(memory), image_buffer_memory_(nullptr), memory_owner_(memory_owner),
-    shape_(shape.b, shape.h, shape.w, 1, shape.c), descriptor_(descriptor)
-{
-}
-
-Tensor::Tensor(cl_mem memory, bool memory_owner, const BHWDC &shape,
-               const TensorDescriptor &descriptor)
-  : memory_(memory), image_buffer_memory_(nullptr), memory_owner_(memory_owner), shape_(shape),
-    descriptor_(descriptor)
-{
-}
-
-Tensor::Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory, const BHWC &shape,
-               const TensorDescriptor &descriptor)
-  : memory_(memory), image_buffer_memory_(image_buffer_memory), memory_owner_(memory_owner),
-    shape_(shape.b, shape.h, shape.w, 1, shape.c), descriptor_(descriptor)
-{
-}
-
-Tensor::Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory, const BHWDC &shape,
-               const TensorDescriptor &descriptor)
-  : memory_(memory), image_buffer_memory_(image_buffer_memory), memory_owner_(memory_owner),
-    shape_(shape), descriptor_(descriptor)
-{
-}
-
-Tensor::Tensor(Tensor &&tensor)
-  : memory_(tensor.memory_), image_buffer_memory_(tensor.image_buffer_memory_),
-    memory_owner_(tensor.memory_owner_), shape_(tensor.shape_), descriptor_(tensor.descriptor_)
-{
-  tensor.memory_ = nullptr;
-  tensor.image_buffer_memory_ = nullptr;
-}
-
-Tensor &Tensor::operator=(Tensor &&tensor)
-{
-  if (this != &tensor)
-  {
-    Release();
-    std::swap(memory_, tensor.memory_);
-    std::swap(image_buffer_memory_, tensor.image_buffer_memory_);
-    std::swap(memory_owner_, tensor.memory_owner_);
-    std::swap(shape_, tensor.shape_);
-    std::swap(descriptor_, tensor.descriptor_);
-  }
-  return *this;
-}
-
-void Tensor::Release()
-{
-  // image_buffer_memory_ always owned by object
-  if (image_buffer_memory_)
-  {
-    clReleaseMemObject(image_buffer_memory_);
-    image_buffer_memory_ = nullptr;
-  }
-  if (memory_owner_ && memory_)
-  {
-    clReleaseMemObject(memory_);
-    memory_ = nullptr;
-  }
-}
-
-absl::Status Tensor::GetGPUResources(const GPUObjectDescriptor *obj_ptr,
-                                     GPUResourcesWithValue *resources) const
-{
-  const auto *buffer_desc = dynamic_cast<const BufferDescriptor *>(obj_ptr);
-  if (buffer_desc)
-  {
-    if (descriptor_.storage_type != TensorStorageType::BUFFER)
-    {
-      return absl::InvalidArgumentError("Tensor can be used with BufferDescriptor only wtih "
-                                        "TensorStorageType::BUFFER.");
-    }
-    resources->buffers.push_back({"buffer", memory_});
-    return absl::OkStatus();
-  }
-  const auto *tensor_desc = dynamic_cast<const TensorDescriptor *>(obj_ptr);
-  if (!tensor_desc)
-  {
-    return absl::InvalidArgumentError("Expected TensorDescriptor on input.");
-  }
-  if (descriptor_.HasAxis(Axis::WIDTH))
-  {
-    resources->ints.push_back({"width", Width()});
-    resources->ints.push_back({"width_div2", Width() / 2});
-    resources->ints.push_back({"width_div4", Width() / 4});
-    resources->ints.push_back({"width_batched", Width() * Batch()});
-    resources->ints.push_back({"width_batched_div2", Width() * Batch() / 2});
-    resources->ints.push_back({"width_batched_div4", Width() * Batch() / 4});
-  }
-  if (descriptor_.HasAxis(Axis::HEIGHT))
-  {
-    resources->ints.push_back({"height", Height()});
-  }
-  if (descriptor_.HasAxis(Axis::CHANNELS))
-  {
-    resources->ints.push_back({"slices", Slices()});
-    resources->ints.push_back({"channels", Channels()});
-  }
-  if (descriptor_.HasAxis(Axis::BATCH))
-  {
-    resources->ints.push_back({"batch", Batch()});
-  }
-  if (descriptor_.HasAxis(Axis::DEPTH))
-  {
-    resources->ints.push_back({"depth", Depth()});
-  }
-
-  if (descriptor_.storage_type == TensorStorageType::BUFFER)
-  {
-    resources->buffers.push_back({"buffer", memory_});
-  }
-  else if (descriptor_.storage_type == TensorStorageType::TEXTURE_2D ||
-           descriptor_.storage_type == TensorStorageType::SINGLE_TEXTURE_2D)
-  {
-    resources->images2d.push_back({"image2d", memory_});
-  }
-  else if (descriptor_.storage_type == TensorStorageType::TEXTURE_ARRAY)
-  {
-    resources->image2d_arrays.push_back({"image2d_array", memory_});
-  }
-  else if (descriptor_.storage_type == TensorStorageType::TEXTURE_3D)
-  {
-    resources->images3d.push_back({"image3d", memory_});
-  }
-  else if (descriptor_.storage_type == TensorStorageType::IMAGE_BUFFER)
-  {
-    if (obj_ptr->GetAccess() == AccessType::READ)
-    {
-      resources->image_buffers.push_back({"image_buffer", image_buffer_memory_});
-    }
-    else
-    {
-      resources->buffers.push_back({"buffer", memory_});
-    }
-  }
-
-  return absl::OkStatus();
-}
-
-int3 Tensor::GetFullTensorRegion() const
-{
-  switch (descriptor_.storage_type)
-  {
-    case TensorStorageType::BUFFER:
-    case TensorStorageType::TEXTURE_ARRAY:
-    case TensorStorageType::TEXTURE_3D:
-    case TensorStorageType::IMAGE_BUFFER:
-      return {shape_.w * shape_.b, shape_.h, shape_.d * Slices()};
-    case TensorStorageType::TEXTURE_2D:
-      return {shape_.w * shape_.b * shape_.d, shape_.h * Slices(), 1};
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      return {shape_.w * shape_.b * shape_.d, shape_.h, 1};
-    case TensorStorageType::UNKNOWN:
-      return {-1, -1, -1};
-  }
-  return {-1, -1, -1};
-}
-
-absl::Status Tensor::IsValid(const BHWC &shape) const
-{
-  if (shape.b != shape_.b)
-  {
-    return absl::InvalidArgumentError("Shape batch does not match tensor batch");
-  }
-  if (shape.w != shape_.w)
-  {
-    return absl::InvalidArgumentError("Shape width does not match tensor width");
-  }
-  if (shape.h != shape_.h)
-  {
-    return absl::InvalidArgumentError("Shape height does not match tensor height");
-  }
-  if (shape.c != shape_.c)
-  {
-    return absl::InvalidArgumentError("Shape channels does not match tensor channels");
-  }
-  return absl::OkStatus();
-}
-
-absl::Status Tensor::IsValid(const BHWDC &shape) const
-{
-  if (shape.b != shape_.b)
-  {
-    return absl::InvalidArgumentError("Shape batch does not match tensor batch");
-  }
-  if (shape.w != shape_.w)
-  {
-    return absl::InvalidArgumentError("Shape width does not match tensor width");
-  }
-  if (shape.h != shape_.h)
-  {
-    return absl::InvalidArgumentError("Shape height does not match tensor height");
-  }
-  if (shape.d != shape_.d)
-  {
-    return absl::InvalidArgumentError("Shape depth does not match tensor depth");
-  }
-  if (shape.c != shape_.c)
-  {
-    return absl::InvalidArgumentError("Shape channels does not match tensor channels");
-  }
-  return absl::OkStatus();
-}
-
-int Tensor::GetAlignedChannels() const
-{
-  return descriptor_.storage_type == TensorStorageType::SINGLE_TEXTURE_2D ? shape_.c
-                                                                          : AlignByN(shape_.c, 4);
-}
-
-uint64_t Tensor::GetMemorySizeInBytes() const
-{
-  const uint64_t flt_size = static_cast<uint64_t>(SizeOf(descriptor_.data_type));
-  const uint64_t flt4_size = 4 * flt_size;
-  switch (descriptor_.storage_type)
-  {
-    case TensorStorageType::BUFFER:
-    case TensorStorageType::IMAGE_BUFFER:
-    case TensorStorageType::TEXTURE_ARRAY:
-    case TensorStorageType::TEXTURE_2D:
-    case TensorStorageType::TEXTURE_3D:
-      return flt4_size * shape_.b * shape_.w * shape_.h * shape_.d * Slices();
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      return flt_size * shape_.w * shape_.h * shape_.c * shape_.b * shape_.d;
-    default:
-      return 0;
-  }
-}
-
-cl_mem Tensor::GetMemoryPtr() const
-{
-  return descriptor_.storage_type == TensorStorageType::IMAGE_BUFFER ? image_buffer_memory_
-                                                                     : memory_;
-}
-
-cl_mem Tensor::GetMemoryPtrForWriting() const { return memory_; }
-
-absl::Status Tensor::WriteDataBHWDC(absl::Span<const float> in, CLCommandQueue *queue)
-{
-  void *data_ptr = nullptr;
-  const int aligned_channels = GetAlignedChannels();
-  const int elements_count = shape_.b * shape_.w * shape_.h * shape_.d * aligned_channels;
-
-  const size_t data_size = elements_count * SizeOf(descriptor_.data_type);
-  std::vector<float> data_f;
-  data_f.resize(elements_count);
-  data_ptr = data_f.data();
-  DataFromBHWDC(in, shape_, descriptor_, absl::MakeSpan(data_f.data(), data_f.size()));
-
-  switch (descriptor_.storage_type)
-  {
-    case TensorStorageType::BUFFER:
-    case TensorStorageType::IMAGE_BUFFER:
-      RETURN_IF_ERROR(queue->EnqueueWriteBuffer(memory_, data_size, data_ptr));
-      break;
-    case TensorStorageType::TEXTURE_ARRAY:
-    case TensorStorageType::TEXTURE_2D:
-    case TensorStorageType::TEXTURE_3D:
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      RETURN_IF_ERROR(queue->EnqueueWriteImage(memory_, GetFullTensorRegion(), data_ptr));
-      break;
-    default:
-      return absl::InternalError("Unsupported tensor storage type");
-  }
-
-  return absl::OkStatus();
-}
-
-absl::Status Tensor::WriteData(CLCommandQueue *queue, const TensorFloat32 &src)
-{
-  RETURN_IF_ERROR(IsValid(src.shape));
-  return WriteDataBHWDC(absl::MakeConstSpan(src.data), queue);
-}
-
-absl::Status Tensor::WriteData(CLCommandQueue *queue,
-                               const InternalTensor<Linear, DataType::FLOAT32> &src)
-{
-  return WriteDataBHWDC(absl::MakeConstSpan(src.data), queue);
-}
-
-absl::Status Tensor::WriteData(CLCommandQueue *queue,
-                               const InternalTensor<HWC, DataType::FLOAT32> &src)
-{
-  return WriteDataBHWDC(absl::MakeConstSpan(src.data), queue);
-}
-
-absl::Status Tensor::WriteData(CLCommandQueue *queue, const Tensor5DFloat32 &src)
-{
-  RETURN_IF_ERROR(IsValid(src.shape));
-  return WriteDataBHWDC(absl::MakeConstSpan(src.data), queue);
-}
-
-absl::Status Tensor::ReadDataBHWDC(absl::Span<float> out, CLCommandQueue *queue) const
-{
-  void *data_ptr = nullptr;
-  const int aligned_channels = GetAlignedChannels();
-  const int elements_count = shape_.b * shape_.w * shape_.h * shape_.d * aligned_channels;
-  const size_t data_size = elements_count * SizeOf(descriptor_.data_type);
-
-  std::vector<float> data_f;
-  data_f.resize(elements_count);
-  data_ptr = data_f.data();
-  switch (descriptor_.storage_type)
-  {
-    case TensorStorageType::BUFFER:
-    case TensorStorageType::IMAGE_BUFFER:
-      RETURN_IF_ERROR(queue->EnqueueReadBuffer(memory_, data_size, data_ptr));
-      break;
-    case TensorStorageType::TEXTURE_ARRAY:
-    case TensorStorageType::TEXTURE_2D:
-    case TensorStorageType::TEXTURE_3D:
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      RETURN_IF_ERROR(queue->EnqueueReadImage(memory_, GetFullTensorRegion(), data_ptr));
-      break;
-    default:
-      return absl::InternalError("Unsupported tensor storage type");
-  }
-
-  if (descriptor_.data_type == DataType::FLOAT32)
-  {
-    DataToBHWDC(absl::MakeConstSpan(data_f.data(), data_f.size()), shape_, descriptor_, out);
-  }
-
-  return absl::OkStatus();
-}
-
-absl::Status Tensor::ReadData(CLCommandQueue *queue, TensorFloat32 *dst) const
-{
-  RETURN_IF_ERROR(IsValid(dst->shape));
-  return ReadDataBHWDC(absl::MakeSpan(dst->data), queue);
-}
-
-absl::Status Tensor::ReadData(CLCommandQueue *queue, Tensor5DFloat32 *dst) const
-{
-  RETURN_IF_ERROR(IsValid(dst->shape));
-  return ReadDataBHWDC(absl::MakeSpan(dst->data), queue);
-}
-
-absl::Status Tensor::CreateFromDescriptor(const TensorDescriptor &desc, CLContext *context)
-{
-  shape_ = desc.shape;
-  descriptor_.data_type = desc.data_type;
-  descriptor_.storage_type = desc.storage_type;
-  descriptor_.layout = desc.layout;
-  memory_owner_ = true;
-  CLMemory memory;
-  uint8_t *data_ptr = desc.data.empty() ? nullptr : const_cast<unsigned char *>(desc.data.data());
-  RETURN_IF_ERROR(AllocateTensorMemory(*context, shape_, descriptor_, data_ptr, &memory));
-  memory_ = memory.Release();
-  if (desc.storage_type == TensorStorageType::IMAGE_BUFFER)
-  {
-    RETURN_IF_ERROR(CreateImageBufferFromBuffer(*context, memory_, desc.data_type,
-                                                shape_.b * shape_.w * shape_.h * shape_.d *
-                                                  DivideRoundUp(shape_.c, 4),
-                                                &image_buffer_memory_));
-  }
-  return absl::OkStatus();
-}
-
-absl::Status CreateTensor(const CLContext &context, const BHWC &shape,
-                          const TensorDescriptor &descriptor, Tensor *result)
-{
-  const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
-  return CreateTensor(context, shape5D, descriptor, nullptr, result);
-}
-
-absl::Status CreateTensor(const CLContext &context, const BHWDC &shape,
-                          const TensorDescriptor &descriptor, Tensor *result)
-{
-  return CreateTensor(context, shape, descriptor, nullptr, result);
-}
-
-absl::Status CreateSharedTensor(const CLContext &context, cl_mem memory, const BHWC &shape,
-                                const TensorDescriptor &descriptor, Tensor *result)
-{
-  const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
-  return CreateTensorShared(context, shape5D, descriptor, memory, result);
-}
-
-absl::Status CreateSharedTensor(const CLContext &context, cl_mem memory, const BHWDC &shape,
-                                const TensorDescriptor &descriptor, Tensor *result)
-{
-  return CreateTensorShared(context, shape, descriptor, memory, result);
-}
-
-absl::Status AllocateTensorMemory(const CLContext &context, const BHWC &shape,
-                                  const TensorDescriptor &descriptor, CLMemory *result)
-{
-  const BHWDC shape5D(shape.b, shape.h, shape.w, 1, shape.c);
-  return AllocateTensorMemory(context, shape5D, descriptor, nullptr, result);
-}
-
-absl::Status AllocateTensorMemory(const CLContext &context, const BHWDC &shape,
-                                  const TensorDescriptor &descriptor, CLMemory *result)
-{
-  return AllocateTensorMemory(context, shape, descriptor, nullptr, result);
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Tensor.h b/runtime/onert/backend/gpu_cl/open_cl/Tensor.h
deleted file mode 100644
index b1930a423..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Tensor.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_H__
-
-#include <cstdint>
-#include <memory>
-
-#include "absl/types/span.h"
-#include "ClCommandQueue.h"
-#include "OpenclWrapper.h"
-#include "ClContext.h"
-#include "ClDevice.h"
-#include "ClMemory.h"
-#include "GpuObject.h"
-#include "TensorType.h"
-#include "Util.h"
-#include "DataType.h"
-#include "Shape.h"
-#include "Status.h"
-#include "InternalTensor.h"
-#include "Types.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-class Tensor : public GPUObject
-{
-public:
-  Tensor() : memory_(nullptr), image_buffer_memory_(nullptr), memory_owner_(true) {}
-  Tensor(cl_mem memory, bool memory_owner, const BHWC &shape, const TensorDescriptor &descriptor);
-  Tensor(cl_mem memory, bool memory_owner, const BHWDC &shape, const TensorDescriptor &descriptor);
-  Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory, const BHWC &shape,
-         const TensorDescriptor &descriptor);
-  Tensor(cl_mem memory, bool memory_owner, cl_mem image_buffer_memory, const BHWDC &shape,
-         const TensorDescriptor &descriptor);
-
-  // Move only
-  Tensor(Tensor &&tensor);
-  Tensor &operator=(Tensor &&tensor);
-  Tensor(const Tensor &) = delete;
-  Tensor &operator=(const Tensor &) = delete;
-
-  virtual ~Tensor() { Release(); }
-
-  absl::Status GetGPUResources(const GPUObjectDescriptor *obj_ptr,
-                               GPUResourcesWithValue *resources) const override;
-
-  int Width() const { return shape_.w; }
-  int Height() const { return shape_.h; }
-  int Depth() const { return shape_.d; }
-  int Channels() const { return shape_.c; }
-  int Slices() const { return DivideRoundUp(shape_.c, 4); }
-  int Batch() const { return shape_.b; }
-  TensorDescriptor GetDescriptor() const { return descriptor_; }
-  DataType GetDataType() const { return descriptor_.data_type; }
-  TensorStorageType GetStorageType() const { return descriptor_.storage_type; }
-
-  // for profiling and memory statistics
-  uint64_t GetMemorySizeInBytes() const;
-
-  cl_mem GetMemoryPtr() const;
-
-  // This function returns buffer memory ptr for IMAGE_BUFFER instead of image
-  // memory ptr.
-  cl_mem GetMemoryPtrForWriting() const;
-
-  absl::Status WriteData(CLCommandQueue *queue, const TensorFloat32 &src);
-  absl::Status WriteData(CLCommandQueue *queue,
-                         const InternalTensor<Linear, DataType::FLOAT32> &src);
-  absl::Status WriteData(CLCommandQueue *queue, const InternalTensor<HWC, DataType::FLOAT32> &src);
-
-  absl::Status WriteData(CLCommandQueue *queue, const Tensor5DFloat32 &src);
-  absl::Status ReadData(CLCommandQueue *queue, TensorFloat32 *dst) const;
-  absl::Status ReadData(CLCommandQueue *queue, Tensor5DFloat32 *dst) const;
-
-  absl::Status CreateFromDescriptor(const TensorDescriptor &desc, CLContext *context);
-
-private:
-  absl::Status IsValid(const BHWC &shape) const;
-  absl::Status IsValid(const BHWDC &shape) const;
-
-  int GetChannelsAlignment() const;
-  int GetAlignedChannels() const;
-
-  absl::Status WriteDataBHWDC(absl::Span<const float> in, CLCommandQueue *queue);
-  absl::Status ReadDataBHWDC(absl::Span<float> out, CLCommandQueue *queue) const;
-
-  int3 GetFullTensorRegion() const;
-  void Release();
-
-  cl_mem memory_;
-  cl_mem image_buffer_memory_; // for TensorStorageType::IMAGE_BUFFER only
-  bool memory_owner_;
-  BHWDC shape_;
-  TensorDescriptor descriptor_;
-};
-
-using TensorPtr = std::shared_ptr<Tensor>;
-
-absl::Status AllocateTensorMemory(const CLContext &context, const BHWC &shape,
-                                  const TensorDescriptor &descriptor, CLMemory *result);
-
-absl::Status AllocateTensorMemory(const CLContext &context, const BHWDC &shape,
-                                  const TensorDescriptor &descriptor, CLMemory *result);
-
-absl::Status CreateTensor(const CLContext &context, const BHWC &shape,
-                          const TensorDescriptor &descriptor, Tensor *result);
-
-absl::Status CreateTensor(const CLContext &context, const BHWDC &shape,
-                          const TensorDescriptor &descriptor, Tensor *result);
-
-absl::Status CreateSharedTensor(const CLContext &context, cl_mem memory, const BHWC &shape,
-                                const TensorDescriptor &descriptor, Tensor *result);
-
-absl::Status CreateSharedTensor(const CLContext &context, cl_mem memory, const BHWDC &shape,
-                                const TensorDescriptor &descriptor, Tensor *result);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/TensorType.cc b/runtime/onert/backend/gpu_cl/open_cl/TensorType.cc
deleted file mode 100644
index 7ede38795..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/TensorType.cc
+++ /dev/null
@@ -1,1116 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TensorType.h"
-
-#include "absl/strings/str_cat.h"
-#include "absl/strings/substitute.h"
-#include "Shape.h"
-#include "DataType.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-std::string GetWriteImageFromDataType(DataType data_type)
-{
-  if (data_type == DataType::FLOAT32)
-  {
-    return "write_imagef";
-  }
-  else if (data_type == DataType::FLOAT16)
-  {
-    return "write_imageh";
-  }
-  else
-  {
-    throw std::runtime_error("Not supported data type");
-  }
-}
-
-} // namespace
-
-std::string TextureAddressModeToString(TextureAddressMode address_mode)
-{
-  switch (address_mode)
-  {
-    case TextureAddressMode::DONT_CARE:
-      return "smp_none";
-    case TextureAddressMode::ZERO:
-      return "smp_zero";
-  }
-  return "";
-}
-
-std::string ToString(TensorStorageType type)
-{
-  switch (type)
-  {
-    case TensorStorageType::UNKNOWN:
-      return "TensorStorageType::UNKNOWN";
-    case TensorStorageType::BUFFER:
-      return "TensorStorageType::BUFFER";
-    case TensorStorageType::TEXTURE_ARRAY:
-      return "TensorStorageType::TEXTURE_ARRAY";
-    case TensorStorageType::TEXTURE_2D:
-      return "TensorStorageType::TEXTURE_2D";
-    case TensorStorageType::TEXTURE_3D:
-      return "TensorStorageType::TEXTURE_3D";
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      return "TensorStorageType::SINGLE_TEXTURE_2D";
-    case TensorStorageType::IMAGE_BUFFER:
-      return "TensorStorageType::IMAGE_BUFFER";
-  }
-  return "";
-}
-
-TensorDescriptor::TensorDescriptor(TensorDescriptor &&desc)
-  : GPUObjectDescriptor(std::move(desc)), data_type(desc.data_type),
-    storage_type(desc.storage_type), layout(desc.layout), shape(desc.shape),
-    data(std::move(desc.data))
-{
-}
-TensorDescriptor &TensorDescriptor::operator=(TensorDescriptor &&desc)
-{
-  if (this != &desc)
-  {
-    std::swap(data_type, desc.data_type);
-    std::swap(storage_type, desc.storage_type);
-    std::swap(layout, desc.layout);
-    std::swap(shape, desc.shape);
-    data = std::move(desc.data);
-    GPUObjectDescriptor::operator=(std::move(desc));
-  }
-  return *this;
-}
-
-GPUResources TensorDescriptor::GetGPUResources() const
-{
-  GPUResources resources;
-  if (HasAxis(Axis::WIDTH))
-  {
-    resources.ints.push_back("width");
-    resources.ints.push_back("width_div2");
-    resources.ints.push_back("width_div4");
-    resources.ints.push_back("width_batched");
-    resources.ints.push_back("width_batched_div2");
-    resources.ints.push_back("width_batched_div4");
-  }
-  if (HasAxis(Axis::HEIGHT))
-  {
-    resources.ints.push_back("height");
-  }
-  if (HasAxis(Axis::CHANNELS))
-  {
-    resources.ints.push_back("slices");
-    resources.ints.push_back("channels");
-  }
-  if (HasAxis(Axis::BATCH))
-  {
-    resources.ints.push_back("batch");
-  }
-  if (HasAxis(Axis::DEPTH))
-  {
-    resources.ints.push_back("depth");
-  }
-  if (storage_type == TensorStorageType::BUFFER)
-  {
-    GPUBufferDescriptor desc;
-    desc.data_type = data_type;
-    desc.access_type = access_type_;
-    desc.element_size = 4;
-    auto it1 = state_vars_.find("ElementsX2");
-    if (it1 != state_vars_.end() && it1->second == "true")
-    {
-      desc.element_size = 8;
-    }
-    auto it2 = state_vars_.find("ElementsX4");
-    if (it2 != state_vars_.end() && it2->second == "true")
-    {
-      desc.element_size = 16;
-    }
-    resources.buffers.push_back({"buffer", desc});
-  }
-  else if (storage_type == TensorStorageType::SINGLE_TEXTURE_2D ||
-           storage_type == TensorStorageType::TEXTURE_2D)
-  {
-    GPUImage2DDescriptor desc;
-    desc.data_type = data_type;
-    desc.access_type = access_type_;
-    resources.images2d.push_back({"image2d", desc});
-  }
-  else if (storage_type == TensorStorageType::TEXTURE_ARRAY)
-  {
-    GPUImage2DArrayDescriptor desc;
-    desc.data_type = data_type;
-    desc.access_type = access_type_;
-    resources.image2d_arrays.push_back({"image2d_array", desc});
-  }
-  else if (storage_type == TensorStorageType::TEXTURE_3D)
-  {
-    GPUImage3DDescriptor desc;
-    desc.data_type = data_type;
-    desc.access_type = access_type_;
-    resources.images3d.push_back({"image3d", desc});
-  }
-  else if (storage_type == TensorStorageType::IMAGE_BUFFER)
-  {
-    if (access_type_ == AccessType::READ)
-    {
-      GPUImageBufferDescriptor desc;
-      desc.data_type = data_type;
-      desc.access_type = access_type_;
-      resources.image_buffers.push_back({"image_buffer", desc});
-    }
-    else
-    {
-      GPUBufferDescriptor desc;
-      desc.data_type = data_type;
-      desc.access_type = access_type_;
-      desc.element_size = 4;
-      resources.buffers.push_back({"buffer", desc});
-    }
-  }
-  return resources;
-}
-
-absl::Status TensorDescriptor::PerformSelector(const std::string &selector,
-                                               const std::vector<std::string> &args,
-                                               const std::vector<std::string> &template_args,
-                                               std::string *result) const
-{
-  if (selector == "Width")
-  {
-    *result = GetWidth();
-    return absl::OkStatus();
-  }
-  else if (selector == "Height")
-  {
-    *result = "height";
-    return absl::OkStatus();
-  }
-  else if (selector == "Slices")
-  {
-    *result = "slices";
-    return absl::OkStatus();
-  }
-  else if (selector == "SliceStride")
-  {
-    *result = GetSliceStride();
-    return absl::OkStatus();
-  }
-  else if (selector == "Channels")
-  {
-    *result = "channels";
-    return absl::OkStatus();
-  }
-  else if (selector == "Batch")
-  {
-    if (HasAxis(Axis::BATCH))
-    {
-      *result = "batch";
-    }
-    else
-    {
-      *result = "1";
-    }
-    return absl::OkStatus();
-  }
-  else if (selector == "Depth")
-  {
-    *result = "depth";
-    return absl::OkStatus();
-  }
-  else if (selector == "SetBatchRef")
-  {
-    if (args.size() != 1)
-    {
-      return absl::InvalidArgumentError("Unsupported arguments in SetBatchRef selector");
-    }
-    state_vars_["batch_id"] = args[0];
-    *result = "";
-    return absl::OkStatus();
-  }
-  else if (selector == "Read")
-  {
-    return PerformReadSelector(args, template_args, result);
-  }
-  else if (selector == "Write")
-  {
-    return PerformWriteSelector(args, result);
-  }
-  else if (selector == "WriteLinear")
-  {
-    return PerformWriteLinearSelector(args, result);
-  }
-  else if (selector == "GetAddress")
-  {
-    return PerformGetAddressSelector(args, result);
-  }
-  else if (selector == "GetPtrWithSliceOffset")
-  {
-    return PerformGetPtrWithSliceOffsetSelector(args, result);
-  }
-  else if (selector == "GetWHOffset")
-  {
-    return PerformGetWHOffsetSelector(args, result);
-  }
-  else if (selector == "GetHandle")
-  {
-    return PerformGetHandleSelector(args, result);
-  }
-  else
-  {
-    return absl::NotFoundError(
-      absl::StrCat("TensorDescriptor don't have selector with name - ", selector));
-  }
-}
-
-absl::Status TensorDescriptor::PerformReadSelector(const std::vector<std::string> &args,
-                                                   const std::vector<std::string> &template_args,
-                                                   std::string *result) const
-{
-  DataType read_as_type = data_type;
-  if (!template_args.empty())
-  {
-    if (template_args.size() != 1)
-    {
-      return absl::NotFoundError("Unrecognized Read selector template arguments.");
-    }
-    else
-    {
-      RETURN_IF_ERROR(GetDataTypeFromTemplateArgs(template_args[0], &read_as_type));
-    }
-  }
-  if (args.size() == 1)
-  { // function overload for 1D linear types.
-    if (storage_type == TensorStorageType::BUFFER ||
-        storage_type == TensorStorageType::IMAGE_BUFFER)
-    {
-      *result = Read(read_as_type, args[0]);
-      return absl::OkStatus();
-    }
-    else
-    {
-      return absl::InvalidArgumentError(
-        "Read selector with single argument can be used only with linear "
-        "storage types(BUFFER or IMAGE_BUFFER)");
-    }
-  }
-  std::string xc;
-  std::string yc;
-  std::string zc;
-  std::string sc;
-  std::string bc;
-  bool parsed = ParseCoordsFromArgs(args, 0, &xc, &yc, &zc, &sc, &bc);
-  if (args.size() < 2 || !parsed)
-  {
-    return absl::NotFoundError("Unrecognized Read selector");
-  }
-
-  *result = Read(read_as_type, GetGlobalAddressNoDeclaration(xc, yc, zc, sc, bc));
-  return absl::OkStatus();
-}
-
-absl::Status TensorDescriptor::GetLinkingContextFromWriteSelector(
-  const std::vector<std::string> &args, std::string *value_name, std::string *x_coord,
-  std::string *y_coord, std::string *s_coord) const
-{
-  std::string xc;
-  std::string yc;
-  std::string zc;
-  std::string sc;
-  std::string bc;
-  bool parsed = ParseCoordsFromArgs(args, 1, &xc, &yc, &zc, &sc, &bc);
-  if (args.size() < 2 || !parsed)
-  {
-    return absl::NotFoundError("Unrecognized Write selector");
-  }
-  *value_name = args[0];
-  if (HasAxis(Axis::BATCH) && !IsBatchedWidth())
-  {
-    *x_coord = absl::StrCat("((", xc, ") * batch + (", bc, "))");
-  }
-  else
-  {
-    *x_coord = absl::StrCat("(", xc, ")");
-  }
-  *y_coord = absl::StrCat("(", yc, ")");
-  *s_coord = absl::StrCat("(", sc, ")");
-  return absl::OkStatus();
-}
-
-absl::Status TensorDescriptor::PerformWriteSelector(const std::vector<std::string> &args,
-                                                    std::string *result) const
-{
-  std::string xc;
-  std::string yc;
-  std::string zc;
-  std::string sc;
-  std::string bc;
-  bool parsed = ParseCoordsFromArgs(args, 1, &xc, &yc, &zc, &sc, &bc);
-  if (args.size() < 2 || !parsed)
-  {
-    return absl::NotFoundError("Unrecognized Write selector");
-  }
-  *result = Write(args[0], GetGlobalAddressNoDeclaration(xc, yc, zc, sc, bc));
-  return absl::OkStatus();
-}
-
-absl::Status TensorDescriptor::PerformWriteLinearSelector(const std::vector<std::string> &args,
-                                                          std::string *result) const
-{
-  if (storage_type != TensorStorageType::BUFFER && storage_type != TensorStorageType::IMAGE_BUFFER)
-  {
-    return absl::InvalidArgumentError("WriteLinear selector can be used only with linear "
-                                      "storages(BUFFER/IMAGE_BUFFER)");
-  }
-  if (args.size() != 2)
-  {
-    return absl::NotFoundError("Unrecognized WriteLinear selector");
-  }
-  *result = Write(args[0], "(" + args[1] + ")");
-  return absl::OkStatus();
-}
-
-std::string TensorDescriptor::Read(DataType read_as_type, const std::string &global_address) const
-{
-  const std::string read_as = read_as_type == DataType::FLOAT16 ? "read_imageh" : "read_imagef";
-  std::string image_type;
-  if (storage_type == TensorStorageType::TEXTURE_2D ||
-      storage_type == TensorStorageType::SINGLE_TEXTURE_2D)
-  {
-    image_type = "image2d";
-  }
-  else if (storage_type == TensorStorageType::TEXTURE_3D)
-  {
-    image_type = "image3d";
-  }
-  else if (storage_type == TensorStorageType::TEXTURE_ARRAY)
-  {
-    image_type = "image2d_array";
-  }
-  switch (storage_type)
-  {
-    case TensorStorageType::BUFFER:
-      if (read_as_type == data_type)
-      {
-        return absl::StrCat("buffer[", global_address, "]");
-      }
-      else
-      {
-        const std::string conversion =
-          read_as_type == DataType::FLOAT16 ? "convert_half4" : "convert_float4";
-        return absl::StrCat(conversion, "(buffer[", global_address, "])");
-      }
-    case TensorStorageType::TEXTURE_2D:
-    case TensorStorageType::TEXTURE_3D:
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-    case TensorStorageType::TEXTURE_ARRAY:
-      return absl::StrCat(read_as, "(", image_type,
-                          ", " + TextureAddressModeToString(ModeFromState()) + ", ", global_address,
-                          ")");
-    case TensorStorageType::IMAGE_BUFFER:
-      return absl::StrCat(read_as, "(image_buffer, ", global_address, ")");
-    case TensorStorageType::UNKNOWN:
-      return "";
-  }
-  return "";
-}
-
-std::string TensorDescriptor::Write(const std::string &var_name,
-                                    const std::string &global_address) const
-{
-  std::string image_type;
-  if (storage_type == TensorStorageType::TEXTURE_2D ||
-      storage_type == TensorStorageType::SINGLE_TEXTURE_2D)
-  {
-    image_type = "image2d";
-  }
-  else if (storage_type == TensorStorageType::TEXTURE_3D)
-  {
-    image_type = "image3d";
-  }
-  else if (storage_type == TensorStorageType::TEXTURE_ARRAY)
-  {
-    image_type = "image2d_array";
-  }
-  switch (storage_type)
-  {
-    case TensorStorageType::BUFFER:
-    case TensorStorageType::IMAGE_BUFFER:
-      return absl::StrCat("buffer[", global_address, "] = ", var_name, ";\n");
-    case TensorStorageType::TEXTURE_2D:
-    case TensorStorageType::TEXTURE_3D:
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-    case TensorStorageType::TEXTURE_ARRAY:
-      return absl::StrCat(GetWriteImageFromDataType(data_type), "(", image_type, ", ",
-                          global_address, ", ", var_name, ");\n");
-    case TensorStorageType::UNKNOWN:
-      return "";
-  }
-  return "";
-}
-
-absl::Status TensorDescriptor::PerformGetAddressSelector(const std::vector<std::string> &args,
-                                                         std::string *result) const
-{
-  std::string xc;
-  std::string yc;
-  std::string zc;
-  std::string sc;
-  std::string bc;
-  bool parsed = ParseCoordsFromArgs(args, 1, &xc, &yc, &zc, &sc, &bc);
-  if (args.size() < 3 || !parsed)
-  {
-    return absl::NotFoundError("Unrecognized GetAddress selector");
-  }
-
-  *result = DeclareAddress(args[0], GetGlobalAddressNoDeclaration(xc, yc, zc, sc, bc));
-  return absl::OkStatus();
-}
-
-absl::Status
-TensorDescriptor::PerformGetPtrWithSliceOffsetSelector(const std::vector<std::string> &args,
-                                                       std::string *result) const
-{
-  if (storage_type != TensorStorageType::BUFFER)
-  {
-    return absl::InvalidArgumentError(
-      "GetPtrWithSliceOffset selector can be used only with BUFFER");
-  }
-  if (args.size() != 1)
-  {
-    return absl::NotFoundError(
-      absl::StrCat("GetPtrWithSliceOffset require one argument(slice coordinate), but ",
-                   args.size(), " was passed"));
-  }
-  *result = absl::StrCat("buffer + ", args[0], " * ", GetSliceStride());
-  return absl::OkStatus();
-}
-
-absl::Status TensorDescriptor::PerformGetWHOffsetSelector(const std::vector<std::string> &args,
-                                                          std::string *result) const
-{
-  if (storage_type != TensorStorageType::BUFFER && storage_type != TensorStorageType::IMAGE_BUFFER)
-  {
-    return absl::InvalidArgumentError(
-      "GetWHOffset selector can be used only with BUFFER/IMAGE_BUFFER");
-  }
-  if (args.size() != 2)
-  {
-    return absl::NotFoundError(absl::StrCat(
-      "GetWHOffset require two arguments(X and Y coordinates), but ", args.size(), " was passed"));
-  }
-  if (HasAxis(Axis::BATCH) && !IsBatchedWidth())
-  {
-    auto it = state_vars_.find("batch_id");
-    std::string batch_id;
-    if (it == state_vars_.end())
-    {
-      return absl::NotFoundError(
-        "Not found batch_id. Should be setted up by SetBatchRef(). method");
-    }
-    else
-    {
-      batch_id = it->second;
-    }
-    *result = absl::StrCat("((", args[1], ") * ", GetWidth(), " + (", args[0], ")) * batch + (",
-                           batch_id, ")");
-  }
-  else
-  {
-    *result = absl::StrCat("(", args[1], ") * ", GetWidth(), " + (", args[0], ")");
-  }
-  return absl::OkStatus();
-}
-
-absl::Status TensorDescriptor::PerformGetHandleSelector(const std::vector<std::string> &args,
-                                                        std::string *result) const
-{
-  if (!args.empty())
-  {
-    return absl::NotFoundError(
-      absl::StrCat("GetHandle does not require arguments, but ", args.size(), " was passed"));
-  }
-  switch (storage_type)
-  {
-    case TensorStorageType::BUFFER:
-      *result = "buffer";
-      return absl::OkStatus();
-    case TensorStorageType::IMAGE_BUFFER:
-      if (access_type_ == AccessType::READ)
-      {
-        *result = "image_buffer";
-      }
-      else
-      {
-        *result = "buffer";
-      }
-      return absl::OkStatus();
-    case TensorStorageType::TEXTURE_2D:
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      *result = "image2d";
-      return absl::OkStatus();
-    case TensorStorageType::TEXTURE_ARRAY:
-      *result = "image2d_array";
-      return absl::OkStatus();
-    case TensorStorageType::TEXTURE_3D:
-      *result = "image3d";
-      return absl::OkStatus();
-    case TensorStorageType::UNKNOWN:
-      return absl::UnavailableError("Unknown type");
-  }
-  return absl::UnavailableError("Unknown type");
-}
-
-std::string TensorDescriptor::DeclareAddress(const std::string &var_name,
-                                             const std::string &address) const
-{
-  return absl::StrCat(StorageTypeToAddressType(), " ", var_name, " = ", address, ";");
-}
-
-std::string TensorDescriptor::StorageTypeToAddressType() const
-{
-  switch (storage_type)
-  {
-    case TensorStorageType::BUFFER:
-    case TensorStorageType::IMAGE_BUFFER:
-      return "int";
-    case TensorStorageType::TEXTURE_2D:
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      return "int2";
-    case TensorStorageType::TEXTURE_ARRAY:
-    case TensorStorageType::TEXTURE_3D:
-      return "int4";
-    case TensorStorageType::UNKNOWN:
-      return "";
-  }
-  return "";
-}
-
-std::string TensorDescriptor::GetGlobalAddressNoDeclarationWHS(const std::string &x,
-                                                               const std::string &y,
-                                                               const std::string &s) const
-{
-  switch (storage_type)
-  {
-    case TensorStorageType::BUFFER:
-    case TensorStorageType::IMAGE_BUFFER:
-    {
-      return absl::Substitute("((($2) * height + ($1)) * $3 + ($0))", x, y, s, GetWidth());
-    }
-    case TensorStorageType::TEXTURE_2D:
-      return absl::Substitute("(int2)(($0), ($1) * slices + ($2))", x, y, s);
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      return absl::StrCat("(int2)(", x, ", ", y, ")");
-    case TensorStorageType::TEXTURE_ARRAY:
-    case TensorStorageType::TEXTURE_3D:
-      return absl::StrCat("(int4)(", x, ", ", y, ", ", s, ", 0)");
-    case TensorStorageType::UNKNOWN:
-      return "error";
-  }
-  return "error";
-}
-
-std::string TensorDescriptor::GetGlobalAddressNoDeclarationWHSB(const std::string &x,
-                                                                const std::string &y,
-                                                                const std::string &s,
-                                                                const std::string &b) const
-{
-  switch (storage_type)
-  {
-    case TensorStorageType::BUFFER:
-    case TensorStorageType::IMAGE_BUFFER:
-      return absl::Substitute("(((($3) * height + $2) * width + ($1)) * batch + ($0))", b, x, y, s);
-    case TensorStorageType::TEXTURE_2D:
-      return absl::Substitute("(int2)(($0) * batch + ($1), ($2) * slices + ($3))", x, b, y, s);
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      return absl::Substitute("(int2)(($0) * batch + ($1), ($2))", x, b, y);
-    case TensorStorageType::TEXTURE_ARRAY:
-    case TensorStorageType::TEXTURE_3D:
-      return absl::Substitute("(int4)(($0) * batch + ($1), ($2), ($3), 0)", x, b, y, s);
-    default:
-      throw std::runtime_error("Unknown storage type");
-  }
-}
-
-std::string TensorDescriptor::GetGlobalAddressNoDeclarationWHDS(const std::string &x,
-                                                                const std::string &y,
-                                                                const std::string &z,
-                                                                const std::string &s) const
-{
-  switch (storage_type)
-  {
-    case TensorStorageType::BUFFER:
-    case TensorStorageType::IMAGE_BUFFER:
-    {
-      return absl::Substitute("(((($3) * slices + ($2)) * height + ($1)) * $4 + ($0))", x, y, s, z,
-                              GetWidth());
-    }
-    case TensorStorageType::TEXTURE_2D:
-      return absl::Substitute("(int2)(($0) * depth + ($1), ($2) * slices + ($3))", x, z, y, s);
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      return absl::Substitute("(int2)(($0) * depth + ($1), ($2))", x, z, y);
-    case TensorStorageType::TEXTURE_ARRAY:
-    case TensorStorageType::TEXTURE_3D:
-      return absl::Substitute("(int4)(($0), ($1), ($2) * slices + ($3), 0)", x, y, z, s);
-    case TensorStorageType::UNKNOWN:
-      return "error";
-  }
-  return "error";
-}
-
-std::string TensorDescriptor::GetGlobalAddressNoDeclarationWHDSB(const std::string &x,
-                                                                 const std::string &y,
-                                                                 const std::string &z,
-                                                                 const std::string &s,
-                                                                 const std::string &b) const
-{
-  switch (storage_type)
-  {
-    case TensorStorageType::BUFFER:
-    case TensorStorageType::IMAGE_BUFFER:
-      return absl::Substitute("((((($4) * slices + ($3)) * height + $2) * width + ($1)) * batch + "
-                              "($0))",
-                              b, x, y, s, z);
-    case TensorStorageType::TEXTURE_2D:
-      return absl::Substitute("(int2)((($0) * batch + ($1)) * depth + ($2), ($3) * slices + ($4))",
-                              x, b, z, y, s);
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      return absl::Substitute("(int2)((($0) * batch + ($1)) * depth + ($2), ($3))", x, b, z, y);
-    case TensorStorageType::TEXTURE_ARRAY:
-    case TensorStorageType::TEXTURE_3D:
-      return absl::Substitute("(int4)(($0) * batch + ($1), ($2), ($3) * slices + ($4), 0)", x, b, y,
-                              z, s);
-    default:
-      throw std::runtime_error("Unknown storage type");
-  }
-}
-
-std::string TensorDescriptor::GetGlobalAddressNoDeclaration(const std::string &xc,
-                                                            const std::string &yc,
-                                                            const std::string &zc,
-                                                            const std::string &sc,
-                                                            const std::string &bc) const
-{
-  if (layout == Layout::HWC || (IsBatchedWidth() && layout == Layout::BHWC))
-  {
-    return GetGlobalAddressNoDeclarationWHS(xc, yc, sc);
-  }
-  else if (layout == Layout::BHWC)
-  {
-    return GetGlobalAddressNoDeclarationWHSB(xc, yc, sc, bc);
-  }
-  else if (layout == Layout::HWDC || (IsBatchedWidth() && layout == Layout::BHWDC))
-  {
-    return GetGlobalAddressNoDeclarationWHDS(xc, yc, zc, sc);
-  }
-  else if (layout == Layout::BHWDC)
-  {
-    return GetGlobalAddressNoDeclarationWHDSB(xc, yc, zc, sc, bc);
-  }
-  else
-  {
-    throw std::runtime_error("Unsupported layout");
-  }
-}
-
-absl::Status TensorDescriptor::GetDataTypeFromTemplateArgs(const std::string &template_arg,
-                                                           DataType *result) const
-{
-  std::string read_type = template_arg;
-  if (read_type == "FLT" || read_type == "ACCUM_FLT")
-  {
-    auto it = state_vars_.find(read_type);
-    if (it == state_vars_.end())
-    {
-      return absl::UnavailableError(
-        absl::StrCat("Read selector template argument ", read_type, " uninitialized."));
-    }
-    else
-    {
-      read_type = it->second;
-    }
-  }
-
-  if (read_type == "half")
-  {
-    *result = DataType::FLOAT16;
-  }
-  else if (read_type == "float")
-  {
-    *result = DataType::FLOAT32;
-  }
-  else
-  {
-    return absl::NotFoundError(
-      absl::StrCat("Unrecognized Read selector template argument - ", read_type));
-  }
-  return absl::OkStatus();
-}
-
-bool TensorDescriptor::HasAxis(Axis axis) const
-{
-  if (axis == Axis::WIDTH || axis == Axis::HEIGHT || axis == Axis::CHANNELS)
-  {
-    return true;
-  }
-  if (axis == Axis::BATCH && (layout == Layout::BHWC || layout == Layout::BHWDC))
-  {
-    return true;
-  }
-  if (axis == Axis::DEPTH && (layout == Layout::HWDC || layout == Layout::BHWDC))
-  {
-    return true;
-  }
-  return false;
-}
-
-void TensorDescriptor::SetTextureAddressMode(TextureAddressMode mode)
-{
-  if (mode == TextureAddressMode::ZERO)
-  {
-    state_vars_["TextureMode"] = "ZERO";
-  }
-  else
-  {
-    state_vars_["TextureMode"] = "DONT_CARE";
-  }
-}
-
-bool TensorDescriptor::ParseCoordsFromArgs(const std::vector<std::string> &args, int offset,
-                                           std::string *xc, std::string *yc, std::string *zc,
-                                           std::string *sc, std::string *bc) const
-{
-  if (HasAxis(Axis::WIDTH))
-  {
-    if ((size_t)offset >= args.size())
-      return false;
-    *xc = args[offset++];
-  }
-  if (HasAxis(Axis::HEIGHT))
-  {
-    if ((size_t)offset >= args.size())
-      return false;
-    *yc = args[offset++];
-  }
-  if (HasAxis(Axis::DEPTH))
-  {
-    if ((size_t)offset >= args.size())
-      return false;
-    *zc = args[offset++];
-  }
-  if (HasAxis(Axis::CHANNELS))
-  {
-    if ((size_t)offset >= args.size())
-    {
-      auto it = state_vars_.find("slice_id");
-      if (it == state_vars_.end())
-      {
-        return false;
-      }
-      else
-      {
-        *sc = it->second;
-      }
-    }
-    else
-    {
-      *sc = args[offset++];
-    }
-  }
-  if (HasAxis(Axis::BATCH) && !IsBatchedWidth())
-  {
-    if ((size_t)offset >= args.size())
-    {
-      auto it = state_vars_.find("batch_id");
-      if (it == state_vars_.end())
-      {
-        return false;
-      }
-      else
-      {
-        *bc = it->second;
-      }
-    }
-    else
-    {
-      *bc = args[offset++];
-    }
-  }
-  return true;
-}
-
-bool TensorDescriptor::IsBatchedWidth() const
-{
-  auto it = state_vars_.find("BatchedWidth");
-  return it != state_vars_.end() && it->second == "true";
-}
-
-std::string TensorDescriptor::GetWidth() const
-{
-  std::string div;
-  auto it1 = state_vars_.find("ElementsX2");
-  if (it1 != state_vars_.end() && it1->second == "true")
-  {
-    div = "_div2";
-  }
-  auto it2 = state_vars_.find("ElementsX4");
-  if (it2 != state_vars_.end() && it2->second == "true")
-  {
-    div = "_div4";
-  }
-  auto it = state_vars_.find("BatchedWidth");
-  if (it != state_vars_.end() && it->second == "true")
-  {
-    return "width_batched" + div;
-  }
-  else
-  {
-    return "width" + div;
-  }
-}
-
-std::string TensorDescriptor::GetSliceStride() const
-{
-  if (IsBatchedWidth())
-  {
-    return GetWidth() + " * height";
-  }
-  else
-  {
-    if (HasAxis(Axis::BATCH))
-    {
-      return GetWidth() + " * height * batch";
-    }
-    else
-    {
-      return GetWidth() + " * height";
-    }
-  }
-}
-
-TextureAddressMode TensorDescriptor::ModeFromState() const
-{
-  auto it = state_vars_.find("TextureMode");
-  if (it != state_vars_.end())
-  {
-    if (it->second == "ZERO")
-    {
-      return TextureAddressMode::ZERO;
-    }
-    else
-    {
-      return TextureAddressMode::DONT_CARE;
-    }
-  }
-  else
-  {
-    return TextureAddressMode::DONT_CARE;
-  }
-}
-
-void TensorDescriptor::UploadData(const InternalTensor<HWC, DataType::FLOAT32> &src)
-{
-  shape = BHWDC(1, src.shape.h, src.shape.w, 1, src.shape.c);
-  UploadData(absl::MakeConstSpan(src.data));
-}
-
-void TensorDescriptor::UploadData(const InternalTensor<Linear, DataType::FLOAT32> &src)
-{
-  shape = BHWDC(1, 1, 1, 1, src.shape.v);
-  UploadData(absl::MakeConstSpan(src.data));
-}
-
-void TensorDescriptor::UploadData(absl::Span<const float> src)
-{
-  int aligned_channels =
-    storage_type == TensorStorageType::SINGLE_TEXTURE_2D ? shape.c : AlignByN(shape.c, 4);
-  int elements_count = shape.b * shape.w * shape.h * shape.d * aligned_channels;
-  data.resize(elements_count * SizeOf(data_type));
-  if (data_type == DataType::FLOAT32)
-  {
-    float *gpu_data = reinterpret_cast<float *>(data.data());
-    DataFromBHWDC(src, shape, *this, absl::MakeSpan(gpu_data, elements_count));
-  }
-}
-
-bool TensorDescriptor::SupportsZeroClamp(const Axis &axis) const
-{
-  switch (storage_type)
-  {
-    case TensorStorageType::UNKNOWN:
-      return false;
-    case TensorStorageType::BUFFER:
-    case TensorStorageType::IMAGE_BUFFER:
-      return false;
-    case TensorStorageType::TEXTURE_ARRAY:
-    case TensorStorageType::TEXTURE_2D:
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      return axis == Axis::WIDTH || axis == Axis::HEIGHT;
-    case TensorStorageType::TEXTURE_3D:
-      return axis == Axis::WIDTH || axis == Axis::HEIGHT || axis == Axis::DEPTH;
-  }
-  return false;
-}
-
-bool TensorDescriptor::CanReadOutOfBorder(const Axis &) const
-{
-  switch (storage_type)
-  {
-    case TensorStorageType::UNKNOWN:
-      return false;
-    case TensorStorageType::BUFFER:
-      return false;
-    case TensorStorageType::IMAGE_BUFFER:
-    case TensorStorageType::TEXTURE_2D:
-    case TensorStorageType::TEXTURE_3D:
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-    case TensorStorageType::TEXTURE_ARRAY:
-      return true;
-  }
-  return false;
-}
-
-bool TensorDescriptor::IsLinear() const
-{
-  return storage_type == TensorStorageType::BUFFER ||
-         storage_type == TensorStorageType::IMAGE_BUFFER;
-}
-
-bool TensorDescriptor::ReturnsZeroForNegOneRead() const
-{
-  return storage_type == TensorStorageType::IMAGE_BUFFER;
-}
-
-namespace
-{
-int GetLinearIndex(const TensorDescriptor &desc, const BHWDC &shape, int b, int x, int y, int d,
-                   int s, int sub_c)
-{
-  const int slices = DivideRoundUp(shape.c, 4);
-  switch (desc.storage_type)
-  {
-    case TensorStorageType::BUFFER:
-    case TensorStorageType::IMAGE_BUFFER:
-    case TensorStorageType::TEXTURE_ARRAY:
-    case TensorStorageType::TEXTURE_3D:
-      return ((((d * slices + s) * shape.h + y) * shape.w + x) * shape.b + b) * 4 +
-             sub_c; // DSHWBC4
-    case TensorStorageType::TEXTURE_2D:
-      return ((((y * slices + s) * shape.w + x) * shape.b + b) * shape.d + d) * 4 +
-             sub_c; // HSWBDC4
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      return (((y * shape.w + x) * shape.b + b) * shape.d + d) * shape.c + sub_c; // HWBDC
-    default:
-      return -1;
-  }
-  return -1;
-}
-
-int GetChannelsAlignment(const TensorDescriptor &desc, const BHWDC &shape)
-{
-  return desc.storage_type == TensorStorageType::SINGLE_TEXTURE_2D ? shape.c : 4;
-}
-} // namespace
-
-template <typename T>
-void DataFromBHWDC(absl::Span<const float> src, const BHWDC &shape, const TensorDescriptor &desc,
-                   absl::Span<T> dst)
-{
-  const int channels_alignment = GetChannelsAlignment(desc, shape);
-  const int slices = DivideRoundUp(shape.c, 4);
-  for (int b = 0; b < shape.b; ++b)
-  {
-    for (int s = 0; s < slices; ++s)
-    {
-      for (int y = 0; y < shape.h; ++y)
-      {
-        for (int x = 0; x < shape.w; ++x)
-        {
-          for (int d = 0; d < shape.d; ++d)
-          {
-            for (int c = 0; c < channels_alignment; ++c)
-            {
-              float value;
-              if (s * 4 + c < shape.c)
-              {
-                const int cpu_index = shape.LinearIndex({b, y, x, d, s * 4 + c});
-                value = src[cpu_index];
-              }
-              else
-              {
-                value = 0.0f;
-              }
-              int gpu_index = GetLinearIndex(desc, shape, b, x, y, d, s, c);
-              dst[gpu_index] = value;
-            }
-          }
-        }
-      }
-    }
-  }
-}
-
-template void DataFromBHWDC<float>(absl::Span<const float> src, const BHWDC &shape,
-                                   const TensorDescriptor &desc, absl::Span<float> dst);
-
-template <typename T>
-void DataToBHWDC(absl::Span<const T> src, const BHWDC &shape, const TensorDescriptor &desc,
-                 absl::Span<float> dst)
-{
-  const int channels_alignment = GetChannelsAlignment(desc, shape);
-  const int slices = DivideRoundUp(shape.c, 4);
-  for (int b = 0; b < shape.b; ++b)
-  {
-    for (int s = 0; s < slices; ++s)
-    {
-      for (int y = 0; y < shape.h; ++y)
-      {
-        for (int x = 0; x < shape.w; ++x)
-        {
-          for (int d = 0; d < shape.d; ++d)
-          {
-            for (int c = 0; c < channels_alignment; ++c)
-            {
-              if (s * 4 + c >= shape.c)
-              {
-                continue;
-              }
-              int cpu_index = shape.LinearIndex({b, y, x, d, s * 4 + c});
-              int gpu_index = GetLinearIndex(desc, shape, b, x, y, d, s, c);
-              dst[cpu_index] = src[gpu_index];
-            }
-          }
-        }
-      }
-    }
-  }
-}
-
-template void DataToBHWDC<float>(absl::Span<const float> src, const BHWDC &shape,
-                                 const TensorDescriptor &desc, absl::Span<float> dst);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/TensorType.h b/runtime/onert/backend/gpu_cl/open_cl/TensorType.h
deleted file mode 100644
index 45523783f..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/TensorType.h
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_TYPE_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_TYPE_H__
-
-#include <cstddef>
-#include <string>
-
-#include "absl/types/span.h"
-#include "GpuObject.h"
-#include "DataType.h"
-#include "InternalTensor.h"
-#include "Shape.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum class TextureAddressMode
-{
-  DONT_CARE, // translated to CLK_ADDRESS_NONE
-  ZERO,      // translated to CLK_ADDRESS_CLAMP
-};
-
-std::string TextureAddressModeToString(TextureAddressMode address_mode);
-
-enum class TensorStorageType
-{
-  UNKNOWN,
-  BUFFER,
-  IMAGE_BUFFER,
-  TEXTURE_2D,
-  TEXTURE_3D,
-  TEXTURE_ARRAY,
-  SINGLE_TEXTURE_2D
-};
-
-struct TensorDescriptor : public GPUObjectDescriptor
-{
-  TensorDescriptor() = default;
-  TensorDescriptor(DataType dt, TensorStorageType st, Layout l)
-    : data_type(dt), storage_type(st), layout(l)
-  {
-  }
-
-  TensorDescriptor(const TensorDescriptor &) = default;
-  TensorDescriptor &operator=(const TensorDescriptor &) = default;
-  TensorDescriptor(TensorDescriptor &&desc);
-  TensorDescriptor &operator=(TensorDescriptor &&desc);
-
-  bool operator==(const TensorDescriptor &d) const
-  {
-    return data_type == d.data_type && storage_type == d.storage_type && layout == d.layout;
-  }
-
-  bool operator!=(const TensorDescriptor &d) const { return !(*this == d); }
-
-  absl::Status PerformSelector(const std::string &selector, const std::vector<std::string> &args,
-                               const std::vector<std::string> &template_args,
-                               std::string *result) const override;
-
-  GPUResources GetGPUResources() const override;
-
-  absl::Status CreateGPUObject(CLContext *context, GPUObjectPtr *result) const override;
-  void Release() override { data.clear(); }
-
-  bool HasAxis(Axis axis) const;
-  void SetTextureAddressMode(TextureAddressMode mode);
-
-  absl::Status GetLinkingContextFromWriteSelector(const std::vector<std::string> &args,
-                                                  std::string *value_name, std::string *x_coord,
-                                                  std::string *y_coord, std::string *s_coord) const;
-
-  void UploadData(const InternalTensor<HWC, DataType::FLOAT32> &src);
-  void UploadData(const InternalTensor<Linear, DataType::FLOAT32> &src);
-
-  bool SupportsZeroClamp(const Axis &axis) const;
-  bool CanReadOutOfBorder(const Axis &axis) const;
-  bool IsLinear() const;
-
-  // applicable only for types that: IsLinear -> true.
-  // In this case for address we have 1d component - addr (int)
-  // If for addr == -1 this linear storage type returns FLT4(0.0), this function
-  // returns true, otherwise false
-  bool ReturnsZeroForNegOneRead() const;
-
-  DataType data_type = DataType::UNKNOWN;
-  TensorStorageType storage_type = TensorStorageType::UNKNOWN;
-  // This field describes logical layout, actual(physical) GPU layout can be
-  // totally different.
-  Layout layout = Layout::UNKNOWN; // Supported layouts is HWC, BHWC, HWDC, BHWDC
-
-  // optional
-  BHWDC shape;
-  std::vector<uint8_t> data;
-
-private:
-  absl::Status PerformReadSelector(const std::vector<std::string> &args,
-                                   const std::vector<std::string> &template_args,
-                                   std::string *result) const;
-
-  absl::Status PerformGetAddressSelector(const std::vector<std::string> &args,
-                                         std::string *result) const;
-
-  absl::Status PerformGetPtrWithSliceOffsetSelector(const std::vector<std::string> &args,
-                                                    std::string *result) const;
-
-  absl::Status PerformGetWHOffsetSelector(const std::vector<std::string> &args,
-                                          std::string *result) const;
-
-  absl::Status PerformGetHandleSelector(const std::vector<std::string> &args,
-                                        std::string *result) const;
-
-  std::string DeclareAddress(const std::string &var_name, const std::string &address) const;
-
-  std::string StorageTypeToAddressType() const;
-
-  absl::Status PerformWriteSelector(const std::vector<std::string> &args,
-                                    std::string *result) const;
-
-  absl::Status PerformWriteLinearSelector(const std::vector<std::string> &args,
-                                          std::string *result) const;
-
-  std::string Read(DataType read_as_type, const std::string &global_address) const;
-  std::string Write(const std::string &var_name, const std::string &global_address) const;
-
-  bool IsBatchedWidth() const;
-
-  std::string GetWidth() const;
-  std::string GetSliceStride() const;
-
-  TextureAddressMode ModeFromState() const;
-
-  absl::Status GetDataTypeFromTemplateArgs(const std::string &template_arg, DataType *result) const;
-
-  std::string GetGlobalAddressNoDeclarationWHS(const std::string &x, const std::string &y,
-                                               const std::string &s) const;
-  std::string GetGlobalAddressNoDeclarationWHSB(const std::string &x, const std::string &y,
-                                                const std::string &s, const std::string &b) const;
-  std::string GetGlobalAddressNoDeclarationWHDS(const std::string &x, const std::string &y,
-                                                const std::string &z, const std::string &s) const;
-  std::string GetGlobalAddressNoDeclarationWHDSB(const std::string &x, const std::string &y,
-                                                 const std::string &z, const std::string &s,
-                                                 const std::string &b) const;
-  std::string GetGlobalAddressNoDeclaration(const std::string &xc, const std::string &yc,
-                                            const std::string &zc, const std::string &sc,
-                                            const std::string &bc) const;
-
-  bool ParseCoordsFromArgs(const std::vector<std::string> &args, int offset, std::string *xc,
-                           std::string *yc, std::string *zc, std::string *sc,
-                           std::string *bc) const;
-
-  void UploadData(absl::Span<const float> src);
-};
-
-template <typename T>
-void DataFromBHWDC(absl::Span<const float> src, const BHWDC &shape, const TensorDescriptor &desc,
-                   absl::Span<T> dst);
-
-template <typename T>
-void DataToBHWDC(absl::Span<const T> src, const BHWDC &shape, const TensorDescriptor &desc,
-                 absl::Span<float> dst);
-
-std::string ToString(TensorStorageType type);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_TYPE_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/TensorTypeUtil.cc b/runtime/onert/backend/gpu_cl/open_cl/TensorTypeUtil.cc
deleted file mode 100644
index b1f8309e4..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/TensorTypeUtil.cc
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "TensorTypeUtil.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-ObjectType ToObjectType(TensorStorageType type)
-{
-  switch (type)
-  {
-    case TensorStorageType::IMAGE_BUFFER:
-    case TensorStorageType::BUFFER:
-      return ObjectType::OPENCL_BUFFER;
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-    case TensorStorageType::TEXTURE_2D:
-    case TensorStorageType::TEXTURE_ARRAY:
-    case TensorStorageType::TEXTURE_3D:
-      return ObjectType::OPENCL_TEXTURE;
-    default:
-      return ObjectType::UNKNOWN;
-  }
-}
-
-DataLayout ToDataLayout(TensorStorageType type)
-{
-  switch (type)
-  {
-    case TensorStorageType::BUFFER:
-      return DataLayout::DHWC4;
-    case TensorStorageType::IMAGE_BUFFER:
-      return DataLayout::DHWC4;
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      return DataLayout::BHWC;
-    case TensorStorageType::TEXTURE_2D:
-      return DataLayout::HDWC4;
-    case TensorStorageType::TEXTURE_ARRAY:
-      return DataLayout::DHWC4;
-    case TensorStorageType::TEXTURE_3D:
-      return DataLayout::DHWC4;
-    default:
-      return DataLayout::UNKNOWN;
-  }
-}
-
-TensorStorageType ToTensorStorageType(ObjectType object_type, DataLayout data_layout)
-{
-  switch (object_type)
-  {
-    case ObjectType::OPENCL_BUFFER:
-      return TensorStorageType::BUFFER;
-    case ObjectType::OPENCL_TEXTURE:
-      switch (data_layout)
-      {
-        case DataLayout::BHWC:
-          return TensorStorageType::SINGLE_TEXTURE_2D;
-        case DataLayout::DHWC4:
-          return TensorStorageType::TEXTURE_ARRAY;
-        case DataLayout::HDWC4:
-          return TensorStorageType::TEXTURE_2D;
-        default:
-          return TensorStorageType::UNKNOWN;
-      }
-    default:
-      return TensorStorageType::UNKNOWN;
-  }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/TensorTypeUtil.h b/runtime/onert/backend/gpu_cl/open_cl/TensorTypeUtil.h
deleted file mode 100644
index f56fc3d83..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/TensorTypeUtil.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_TYPE_UTIL_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_TYPE_UTIL_H__
-
-#include "Api.h"
-#include "TensorType.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-ObjectType ToObjectType(TensorStorageType type);
-
-DataLayout ToDataLayout(TensorStorageType type);
-
-TensorStorageType ToTensorStorageType(ObjectType object_type, DataLayout data_layout);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_TENSOR_TYPE_UTIL_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Texture2d.cc b/runtime/onert/backend/gpu_cl/open_cl/Texture2d.cc
deleted file mode 100644
index ae25e85d0..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Texture2d.cc
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Texture2d.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-// Creates new 4-channel 2D texture with cl_channel_type elements
-absl::Status CreateTexture2D(int width, int height, DataType type, void *data, CLContext *context,
-                             Texture2D *result)
-{
-  cl_mem texture;
-  cl_channel_type channel_type = DataTypeToChannelType(type);
-  RETURN_IF_ERROR(
-    CreateRGBAImage2D(context->context(), width, height, channel_type, data, &texture));
-  *result = Texture2D(texture, width, height, channel_type);
-
-  return absl::OkStatus();
-}
-} // namespace
-
-Texture2DDescriptor::Texture2DDescriptor(Texture2DDescriptor &&desc)
-  : GPUObjectDescriptor(std::move(desc)), element_type(desc.element_type),
-    normalized(desc.normalized), normalized_type(desc.normalized_type), size(desc.size),
-    data(std::move(desc.data))
-{
-}
-
-Texture2DDescriptor &Texture2DDescriptor::operator=(Texture2DDescriptor &&desc)
-{
-  if (this != &desc)
-  {
-    std::swap(element_type, desc.element_type);
-    std::swap(normalized, desc.normalized);
-    std::swap(normalized_type, desc.normalized_type);
-    std::swap(size, desc.size);
-    data = std::move(desc.data);
-    GPUObjectDescriptor::operator=(std::move(desc));
-  }
-  return *this;
-}
-
-void Texture2DDescriptor::Release() { data.clear(); }
-
-GPUResources Texture2DDescriptor::GetGPUResources() const
-{
-  GPUResources resources;
-  GPUImage2DDescriptor desc;
-  desc.data_type = element_type;
-  desc.access_type = access_type_;
-  resources.images2d.push_back({"tex2d", desc});
-  return resources;
-}
-
-absl::Status Texture2DDescriptor::PerformSelector(const std::string &selector,
-                                                  const std::vector<std::string> &args,
-                                                  const std::vector<std::string> &,
-                                                  std::string *result) const
-{
-  if (selector == "Read")
-  {
-    return PerformReadSelector(args, result);
-  }
-  else
-  {
-    return absl::NotFoundError(
-      absl::StrCat("Texture2DDescriptor don't have selector with name - ", selector));
-  }
-}
-
-absl::Status Texture2DDescriptor::PerformReadSelector(const std::vector<std::string> &args,
-                                                      std::string *result) const
-{
-  if (args.size() != 2)
-  {
-    return absl::NotFoundError(absl::StrCat("Texture2DDescriptor Read require two arguments, but ",
-                                            args.size(), " was passed"));
-  }
-  std::string read;
-  switch (element_type)
-  {
-    case DataType::FLOAT32:
-      read = "read_imagef";
-      break;
-    case DataType::FLOAT16:
-      read = "read_imageh";
-      break;
-    case DataType::INT8:
-    case DataType::INT16:
-    case DataType::INT32:
-      if (normalized)
-      {
-        read = normalized_type == DataType::FLOAT16 ? "read_imageh" : "read_imagef";
-      }
-      else
-      {
-        read = "read_imagei";
-      }
-      break;
-    case DataType::UINT8:
-    case DataType::UINT16:
-    case DataType::UINT32:
-      if (normalized)
-      {
-        read = normalized_type == DataType::FLOAT16 ? "read_imageh" : "read_imagef";
-      }
-      else
-      {
-        read = "read_imageui";
-      }
-      break;
-    default:
-      read = "unknown_type";
-      break;
-  }
-  *result = absl::StrCat(read, "(tex2d, smp_none, (int2)(", args[0], ", " + args[1] + "))");
-  return absl::OkStatus();
-}
-
-absl::Status Texture2DDescriptor::CreateGPUObject(CLContext *context, GPUObjectPtr *result) const
-{
-  Texture2D gpu_texture;
-  RETURN_IF_ERROR(gpu_texture.CreateFromTexture2DDescriptor(*this, context));
-  *result = absl::make_unique<Texture2D>(std::move(gpu_texture));
-  return absl::OkStatus();
-}
-
-Texture2D::Texture2D(cl_mem texture, int width, int height, cl_channel_type type)
-  : texture_(texture), width_(width), height_(height), channel_type_(type)
-{
-}
-
-Texture2D::Texture2D(Texture2D &&texture)
-  : texture_(texture.texture_), width_(texture.width_), height_(texture.height_),
-    channel_type_(texture.channel_type_)
-{
-  texture.texture_ = nullptr;
-  texture.width_ = 0;
-  texture.height_ = 0;
-}
-
-Texture2D &Texture2D::operator=(Texture2D &&texture)
-{
-  if (this != &texture)
-  {
-    Release();
-    std::swap(channel_type_, texture.channel_type_);
-    std::swap(width_, texture.width_);
-    std::swap(height_, texture.height_);
-    std::swap(texture_, texture.texture_);
-  }
-  return *this;
-}
-
-void Texture2D::Release()
-{
-  if (texture_)
-  {
-    clReleaseMemObject(texture_);
-    texture_ = nullptr;
-    width_ = 0;
-    height_ = 0;
-  }
-}
-
-absl::Status Texture2D::GetGPUResources(const GPUObjectDescriptor *obj_ptr,
-                                        GPUResourcesWithValue *resources) const
-{
-  const auto *texture_desc = dynamic_cast<const Texture2DDescriptor *>(obj_ptr);
-  if (!texture_desc)
-  {
-    return absl::InvalidArgumentError("Expected Texture2DDescriptor on input.");
-  }
-
-  resources->images2d.push_back({"tex2d", texture_});
-  return absl::OkStatus();
-}
-
-absl::Status Texture2D::CreateFromTexture2DDescriptor(const Texture2DDescriptor &desc,
-                                                      CLContext *context)
-{
-  width_ = desc.size.x;
-  height_ = desc.size.y;
-  channel_type_ = DataTypeToChannelType(desc.element_type, desc.normalized);
-  uint8_t *data_ptr = desc.data.empty() ? nullptr : const_cast<unsigned char *>(desc.data.data());
-  return CreateRGBAImage2D(context->context(), desc.size.x, desc.size.y, channel_type_, data_ptr,
-                           &texture_);
-}
-
-// Creates new 4-channel 2D texture with f32 elements
-absl::Status CreateTexture2DRGBA32F(int width, int height, CLContext *context, Texture2D *result)
-{
-  return CreateTexture2D(width, height, DataType::FLOAT32, nullptr, context, result);
-}
-
-// Creates new 4-channel 2D texture with f16 elements
-absl::Status CreateTexture2DRGBA16F(int width, int height, CLContext *context, Texture2D *result)
-{
-  return CreateTexture2D(width, height, DataType::FLOAT16, nullptr, context, result);
-}
-
-absl::Status CreateTexture2DRGBA(DataType type, int width, int height, CLContext *context,
-                                 Texture2D *result)
-{
-  return CreateTexture2D(width, height, type, nullptr, context, result);
-}
-
-absl::Status CreateTexture2DRGBA(DataType type, int width, int height, void *data,
-                                 CLContext *context, Texture2D *result)
-{
-  return CreateTexture2D(width, height, type, data, context, result);
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Texture2d.h b/runtime/onert/backend/gpu_cl/open_cl/Texture2d.h
deleted file mode 100644
index 264507079..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Texture2d.h
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_TEXTURE2D_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_TEXTURE2D_H__
-
-#include "absl/strings/str_cat.h"
-#include "absl/types/span.h"
-#include "ClCommandQueue.h"
-#include "ClContext.h"
-#include "GpuObject.h"
-#include "OpenclWrapper.h"
-#include "TensorType.h"
-#include "Util.h"
-#include "DataType.h"
-#include "Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-struct Texture2DDescriptor : public GPUObjectDescriptor
-{
-  DataType element_type;
-  bool normalized = false;  // used with INT data types, if normalized, we read
-                            // in kernel float data.
-  DataType normalized_type; // can be FLOAT32 or FLOAT16, using with normalized
-                            // = true
-
-  // optional
-  int2 size = int2(0, 0);
-  std::vector<uint8_t> data;
-
-  Texture2DDescriptor() = default;
-  Texture2DDescriptor(const Texture2DDescriptor &) = default;
-  Texture2DDescriptor &operator=(const Texture2DDescriptor &) = default;
-  Texture2DDescriptor(Texture2DDescriptor &&desc);
-  Texture2DDescriptor &operator=(Texture2DDescriptor &&desc);
-
-  absl::Status PerformSelector(const std::string &selector, const std::vector<std::string> &args,
-                               const std::vector<std::string> &template_args,
-                               std::string *result) const override;
-
-  GPUResources GetGPUResources() const override;
-  absl::Status PerformReadSelector(const std::vector<std::string> &args, std::string *result) const;
-
-  absl::Status CreateGPUObject(CLContext *context, GPUObjectPtr *result) const override;
-  void Release() override;
-};
-
-// Texture2D represent formatted GPU data storage.
-// Texture2D is moveable but not copyable.
-class Texture2D : public GPUObject
-{
-public:
-  Texture2D() {} // just for using Texture2D as a class members
-  Texture2D(cl_mem texture, int width, int height, cl_channel_type type);
-
-  // Move only
-  Texture2D(Texture2D &&texture);
-  Texture2D &operator=(Texture2D &&texture);
-  Texture2D(const Texture2D &) = delete;
-  Texture2D &operator=(const Texture2D &) = delete;
-
-  virtual ~Texture2D() { Release(); }
-
-  cl_mem GetMemoryPtr() const { return texture_; }
-
-  // Writes data to a texture. Data should point to a region that
-  // has exact width * height * sizeof(pixel) bytes.
-  template <typename T> absl::Status WriteData(CLCommandQueue *queue, const absl::Span<T> data);
-
-  // Reads data from Texture2D into CPU memory.
-  template <typename T> absl::Status ReadData(CLCommandQueue *queue, std::vector<T> *result) const;
-
-  absl::Status GetGPUResources(const GPUObjectDescriptor *obj_ptr,
-                               GPUResourcesWithValue *resources) const override;
-
-  absl::Status CreateFromTexture2DDescriptor(const Texture2DDescriptor &desc, CLContext *context);
-
-private:
-  void Release();
-
-  cl_mem texture_ = nullptr;
-  int width_;
-  int height_;
-  cl_channel_type channel_type_;
-};
-
-using Texture2DPtr = std::shared_ptr<Texture2D>;
-
-// Creates new 4-channel 2D texture with f32 elements
-absl::Status CreateTexture2DRGBA32F(int width, int height, CLContext *context, Texture2D *result);
-
-// Creates new 4-channel 2D texture with f16 elements
-absl::Status CreateTexture2DRGBA16F(int width, int height, CLContext *context, Texture2D *result);
-
-absl::Status CreateTexture2DRGBA(DataType type, int width, int height, CLContext *context,
-                                 Texture2D *result);
-
-absl::Status CreateTexture2DRGBA(DataType type, int width, int height, void *data,
-                                 CLContext *context, Texture2D *result);
-
-template <typename T>
-absl::Status Texture2D::WriteData(CLCommandQueue *queue, const absl::Span<T> data)
-{
-  const int element_size = ChannelTypeToSizeInBytes(channel_type_);
-  if (sizeof(T) % element_size != 0)
-  {
-    return absl::InvalidArgumentError(
-      "Template type T has not suitable element type for created texture.");
-  }
-  if (4 * width_ * height_ * element_size != data.size() * sizeof(T))
-  {
-    return absl::InvalidArgumentError(
-      "absl::Span<T> data size is different from texture allocated size.");
-  }
-
-  RETURN_IF_ERROR(queue->EnqueueWriteImage(texture_, int3(width_, height_, 1), data.data()));
-
-  return absl::OkStatus();
-}
-
-template <typename T>
-absl::Status Texture2D::ReadData(CLCommandQueue *queue, std::vector<T> *result) const
-{
-  const int element_size = ChannelTypeToSizeInBytes(channel_type_);
-  if (sizeof(T) != element_size)
-  {
-    return absl::InvalidArgumentError("Pixel format is different.");
-  }
-
-  const int elements_count = width_ * height_ * 4;
-  result->resize(elements_count);
-
-  return queue->EnqueueReadImage(texture_, int3(width_, height_, 1), result->data());
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_TEXTURE2D_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Types.h b/runtime/onert/backend/gpu_cl/open_cl/Types.h
deleted file mode 100644
index f3cf33450..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Types.h
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_TYPES_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_TYPES_H__
-
-#include <array>
-#include <cstddef>
-#include <cstdint>
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// TODO(akulik): make these types Google-style compliant.
-
-template <typename T> struct alignas(sizeof(T)) Vec4
-{
-  union {
-    struct
-    {
-      T x, y, z, w;
-    };
-    std::array<T, 4> data_;
-  };
-
-  Vec4() : Vec4(T(0.0f)) {}
-
-  template <typename S> Vec4(S x_, S y_, S z_, S w_) : x(x_), y(y_), z(z_), w(w_) {}
-  explicit Vec4(T v) : x(v), y(v), z(v), w(v) {}
-
-  template <typename S> explicit Vec4(S v) : x(v), y(v), z(v), w(v) {}
-
-  Vec4(const Vec4 &f) : x(f.x), y(f.y), z(f.z), w(f.w) {}
-
-  template <typename S> Vec4(const Vec4<S> &f) : x(f.x), y(f.y), z(f.z), w(f.w) {}
-
-  Vec4 &operator=(const Vec4 &other)
-  {
-    x = other.x;
-    y = other.y;
-    z = other.z;
-    w = other.w;
-    return *this;
-  }
-
-  static constexpr int size() { return 4; }
-
-  T &operator[](size_t n) { return data_[n]; }
-  T operator[](size_t n) const { return data_[n]; }
-
-  bool operator==(const Vec4 &value) const
-  {
-    return data_[0] == value[0] && data_[1] == value[1] && data_[2] == value[2] &&
-           data_[3] == value[3];
-  }
-  bool operator!=(const Vec4 &value) const { return !(this->operator==(value)); }
-};
-
-template <typename T> struct alignas(sizeof(T)) Vec3
-{
-  union {
-    struct
-    {
-      T x, y, z;
-    };
-    std::array<T, 3> data_;
-  };
-
-  Vec3() : Vec3(T(0.0f)) {}
-
-  template <typename S> constexpr Vec3(S x_, S y_, S z_) : x(x_), y(y_), z(z_) {}
-  explicit Vec3(T v) : x(v), y(v), z(v) {}
-
-  template <typename S> explicit Vec3(S v) : x(v), y(v), z(v) {}
-
-  Vec3(const Vec3 &f) : x(f.x), y(f.y), z(f.z) {}
-
-  template <typename S> Vec3(const Vec3<S> &f) : x(f.x), y(f.y), z(f.z) {}
-
-  Vec3 &operator=(const Vec3 &other)
-  {
-    x = other.x;
-    y = other.y;
-    z = other.z;
-    return *this;
-  }
-
-  static constexpr int size() { return 3; }
-
-  T &operator[](size_t n) { return data_[n]; }
-  T operator[](size_t n) const { return data_[n]; }
-  bool operator==(const Vec3 &value) const
-  {
-    return data_[0] == value[0] && data_[1] == value[1] && data_[2] == value[2];
-  }
-  bool operator!=(const Vec3 &value) const { return !(this->operator==(value)); }
-};
-
-template <typename T> struct alignas(sizeof(T)) Vec2
-{
-  union {
-    struct
-    {
-      T x, y;
-    };
-    std::array<T, 2> data_;
-  };
-
-  Vec2() : Vec2(T(0.0f)) {}
-
-  template <typename S> Vec2(S x_, S y_) : x(x_), y(y_) {}
-  explicit Vec2(T v) : x(v), y(v) {}
-
-  template <typename S> explicit Vec2(S v) : x(v), y(v) {}
-
-  Vec2(const Vec2 &f) : x(f.x), y(f.y) {}
-
-  template <typename S> Vec2(const Vec2<S> &f) : x(f.x), y(f.y) {}
-
-  Vec2 &operator=(const Vec2 &other)
-  {
-    x = other.x;
-    y = other.y;
-    return *this;
-  }
-
-  bool operator==(const Vec2 &value) const { return data_[0] == value[0] && data_[1] == value[1]; }
-
-  bool operator!=(const Vec2 &value) const { return !(this->operator==(value)); }
-
-  static constexpr int size() { return 2; }
-
-  T &operator[](size_t n) { return data_[n]; }
-  T operator[](size_t n) const { return data_[n]; }
-};
-
-using float2 = Vec2<float>;
-using byte2 = Vec2<int8_t>;
-using ubyte2 = Vec2<uint8_t>;
-using short2 = Vec2<int16_t>;
-using ushort2 = Vec2<uint16_t>;
-using int2 = Vec2<int32_t>;
-using uint2 = Vec2<uint32_t>;
-
-using float3 = Vec3<float>;
-using byte3 = Vec3<int8_t>;
-using ubyte3 = Vec3<uint8_t>;
-using short3 = Vec3<int16_t>;
-using ushort3 = Vec3<uint16_t>;
-using int3 = Vec3<int32_t>;
-using uint3 = Vec3<uint32_t>;
-
-using float4 = Vec4<float>;
-using byte4 = Vec4<int8_t>;
-using ubyte4 = Vec4<uint8_t>;
-using short4 = Vec4<int16_t>;
-using ushort4 = Vec4<uint16_t>;
-using int4 = Vec4<int32_t>;
-using uint4 = Vec4<uint32_t>;
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_TYPES_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Util.cc b/runtime/onert/backend/gpu_cl/open_cl/Util.cc
deleted file mode 100644
index 9f5a8388b..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Util.cc
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Util.h"
-
-#include "absl/strings/str_cat.h"
-#include "absl/strings/substitute.h"
-#include "Status.h"
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-std::string CLErrorCodeToString(cl_int error_code)
-{
-  switch (error_code)
-  {
-    case CL_SUCCESS:
-      return "Success";
-    case CL_DEVICE_NOT_FOUND:
-      return "Device not found";
-    case CL_DEVICE_NOT_AVAILABLE:
-      return "Device not available";
-    case CL_COMPILER_NOT_AVAILABLE:
-      return "Compiler not available";
-    case CL_MEM_OBJECT_ALLOCATION_FAILURE:
-      return "Memory object allocation failure";
-    case CL_OUT_OF_RESOURCES:
-      return "Out of resources";
-    case CL_OUT_OF_HOST_MEMORY:
-      return "Out of host memory";
-    case CL_PROFILING_INFO_NOT_AVAILABLE:
-      return "Profiling information not available";
-    case CL_MEM_COPY_OVERLAP:
-      return "Memory copy overlap";
-    case CL_IMAGE_FORMAT_MISMATCH:
-      return "Image format mismatch";
-    case CL_IMAGE_FORMAT_NOT_SUPPORTED:
-      return "Image format not supported";
-    case CL_BUILD_PROGRAM_FAILURE:
-      return "Build program failure";
-    case CL_MAP_FAILURE:
-      return "Mapping failure";
-    case CL_MISALIGNED_SUB_BUFFER_OFFSET:
-      return "Misaligned sub-buffer offset";
-    case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
-      return "Execution status error for events in wait list";
-    case CL_COMPILE_PROGRAM_FAILURE:
-      return "Compile program failure";
-    case CL_LINKER_NOT_AVAILABLE:
-      return "Linker not available";
-    case CL_LINK_PROGRAM_FAILURE:
-      return "Link program failure";
-    case CL_DEVICE_PARTITION_FAILED:
-      return "Device partition failed";
-    case CL_KERNEL_ARG_INFO_NOT_AVAILABLE:
-      return "Kernel argument information not available";
-
-    case CL_INVALID_VALUE:
-      return "Invalid value";
-    case CL_INVALID_DEVICE_TYPE:
-      return "Invalid device type";
-    case CL_INVALID_PLATFORM:
-      return "Invalid platform";
-    case CL_INVALID_DEVICE:
-      return "Invalid device";
-    case CL_INVALID_CONTEXT:
-      return "Invalid context";
-    case CL_INVALID_QUEUE_PROPERTIES:
-      return "Invalid queue properties";
-    case CL_INVALID_COMMAND_QUEUE:
-      return "Invalid command queue";
-    case CL_INVALID_HOST_PTR:
-      return "Invalid host pointer";
-    case CL_INVALID_MEM_OBJECT:
-      return "Invalid memory object";
-    case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
-      return "Invalid image format descriptor";
-    case CL_INVALID_IMAGE_SIZE:
-      return "Invalid image size";
-    case CL_INVALID_SAMPLER:
-      return "Invalid sampler";
-    case CL_INVALID_BINARY:
-      return "Invalid binary";
-    case CL_INVALID_BUILD_OPTIONS:
-      return "Invalid build options";
-    case CL_INVALID_PROGRAM:
-      return "Invalid program";
-    case CL_INVALID_PROGRAM_EXECUTABLE:
-      return "Invalid program executable";
-    case CL_INVALID_KERNEL_NAME:
-      return "Invalid kernel name";
-    case CL_INVALID_KERNEL_DEFINITION:
-      return "Invalid kernel definition";
-    case CL_INVALID_KERNEL:
-      return "Invalid kernel";
-    case CL_INVALID_ARG_INDEX:
-      return "Invalid argument index";
-    case CL_INVALID_ARG_VALUE:
-      return "Invalid argument value";
-    case CL_INVALID_ARG_SIZE:
-      return "Invalid argument size";
-    case CL_INVALID_KERNEL_ARGS:
-      return "Invalid kernel arguments";
-    case CL_INVALID_WORK_DIMENSION:
-      return "Invalid work dimension";
-    case CL_INVALID_WORK_GROUP_SIZE:
-      return "Invalid work group size";
-    case CL_INVALID_WORK_ITEM_SIZE:
-      return "Invalid work item size";
-    case CL_INVALID_GLOBAL_OFFSET:
-      return "Invalid global offset";
-    case CL_INVALID_EVENT_WAIT_LIST:
-      return "Invalid event wait list";
-    case CL_INVALID_EVENT:
-      return "Invalid event";
-    case CL_INVALID_OPERATION:
-      return "Invalid operation";
-    case CL_INVALID_GL_OBJECT:
-      return "Invalid GL object";
-    case CL_INVALID_BUFFER_SIZE:
-      return "Invalid buffer size";
-    case CL_INVALID_MIP_LEVEL:
-      return "Invalid mip-level";
-    case CL_INVALID_GLOBAL_WORK_SIZE:
-      return "Invalid global work size";
-    case CL_INVALID_PROPERTY:
-      return "Invalid property";
-    case CL_INVALID_IMAGE_DESCRIPTOR:
-      return "Invalid image descriptor";
-    case CL_INVALID_COMPILER_OPTIONS:
-      return "Invalid compiler options";
-    case CL_INVALID_LINKER_OPTIONS:
-      return "Invalid linker options";
-    case CL_INVALID_DEVICE_PARTITION_COUNT:
-      return "Invalid device partition count";
-    case CL_INVALID_PIPE_SIZE:
-      return "Invalid pipe size";
-    case CL_INVALID_DEVICE_QUEUE:
-      return "Invalid device queue";
-    case CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR:
-      return "Invalid GL sharegroup reference KHR";
-
-    default:
-      return "Unknown OpenCL";
-  }
-}
-
-int ChannelTypeToSizeInBytes(cl_channel_type type)
-{
-  switch (type)
-  {
-    case CL_FLOAT:
-      return 4;
-    default:
-      return 0;
-  }
-}
-
-absl::Status CreateCLBuffer(cl_context context, int size_in_bytes, bool read_only, void *data,
-                            cl_mem *result)
-{
-  cl_mem_flags flags = read_only ? CL_MEM_READ_ONLY : CL_MEM_READ_WRITE;
-  if (data)
-  {
-    flags |= CL_MEM_COPY_HOST_PTR;
-  }
-  cl_int error_code;
-  *result = clCreateBuffer(context, flags, size_in_bytes, data, &error_code);
-  if (!*result)
-  {
-    return absl::UnknownError(absl::StrCat("Failed to allocate device memory (clCreateBuffer): ",
-                                           CLErrorCodeToString(error_code)));
-  }
-  return absl::OkStatus();
-}
-
-cl_channel_type DataTypeToChannelType(DataType type, bool normalized)
-{
-  switch (type)
-  {
-    case DataType::FLOAT32:
-      return CL_FLOAT;
-    case DataType::INT8:
-      return normalized ? CL_SNORM_INT8 : CL_SIGNED_INT8;
-    case DataType::UINT8:
-      return normalized ? CL_UNORM_INT8 : CL_UNSIGNED_INT8;
-    case DataType::INT16:
-      return normalized ? CL_SNORM_INT16 : CL_SIGNED_INT16;
-    case DataType::UINT16:
-      return normalized ? CL_UNORM_INT16 : CL_UNSIGNED_INT16;
-    case DataType::INT32:
-      return CL_SIGNED_INT32;
-    case DataType::UINT32:
-      return CL_UNSIGNED_INT32;
-    default:
-      return CL_FLOAT;
-  }
-}
-
-absl::Status CreateRGBAImage2D(cl_context context, int width, int height,
-                               cl_channel_type channel_type, void *data, cl_mem *result)
-{
-  cl_image_desc desc;
-  desc.image_type = CL_MEM_OBJECT_IMAGE2D;
-  desc.image_width = width;
-  desc.image_height = height;
-  desc.image_depth = 0;
-  desc.image_row_pitch = 0;
-  desc.image_slice_pitch = 0;
-  desc.num_mip_levels = 0;
-  desc.num_samples = 0;
-  desc.buffer = nullptr;
-
-  cl_image_format format;
-  format.image_channel_order = CL_RGBA;
-  format.image_channel_data_type = channel_type;
-
-  cl_mem_flags flags = CL_MEM_READ_WRITE;
-  if (data)
-  {
-    flags |= CL_MEM_COPY_HOST_PTR;
-  }
-
-  cl_int error_code;
-  *result = CreateImage2DLegacy(context, flags, &format, &desc, data, &error_code);
-  if (error_code != CL_SUCCESS)
-  {
-    return absl::UnknownError(absl::StrCat("Failed to create 2D texture (clCreateImage): ",
-                                           CLErrorCodeToString(error_code)));
-  }
-  return absl::OkStatus();
-}
-
-std::string GetXStrideCorrected(const std::string &src_x, const std::string &batch_size,
-                                const std::string &stride_x, const std::string &padding_x)
-{
-  // TODO(sorokin) check perf and optimize with floor() if needed
-  // int p0 = src_x / batch_size;\n";
-  // int b0 = src_x % batch_size;\n";
-  // return p0 * stride_x * batch_size + b0 + padding_x;\n";
-  return absl::Substitute("((($0) / $1) * $2 * $1 + (($0) % $1) + $3)", src_x, batch_size, stride_x,
-                          padding_x);
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/Util.h b/runtime/onert/backend/gpu_cl/open_cl/Util.h
deleted file mode 100644
index 996c564f4..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/Util.h
+++ /dev/null
@@ -1,278 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_UTIL_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_UTIL_H__
-
-#include <string>
-
-#include "absl/types/span.h"
-#include "OpenclWrapper.h"
-#include "DataType.h"
-#include "InternalTensor.h"
-#include "Status.h"
-#include "Types.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-// Calculates correct X coordinate when stride != 1 and batch != 1 for layouts
-// with B after W (for example HWBC4) and WB stored in one axis of GPU
-// resources.
-std::string GetXStrideCorrected(const std::string &src_x, const std::string &batch_size,
-                                const std::string &stride_x, const std::string &padding_x);
-
-// @param n must be non negative
-// @param divisor must be greater than zero
-template <typename T, typename N> T DivideRoundUp(T n, N divisor)
-{
-  const T div = static_cast<T>(divisor);
-  const T q = n / div;
-  return n % div == 0 ? q : q + 1;
-}
-
-template <> inline uint3 DivideRoundUp(uint3 n, uint3 divisor)
-{
-  return uint3(DivideRoundUp(n.x, divisor.x), DivideRoundUp(n.y, divisor.y),
-               DivideRoundUp(n.z, divisor.z));
-}
-
-// @param number or its components must be greater than zero
-// @param n must be greater than zero
-template <typename T, typename N> T AlignByN(T number, N n) { return DivideRoundUp(number, n) * n; }
-
-std::string CLErrorCodeToString(cl_int error_code);
-
-int ChannelTypeToSizeInBytes(cl_channel_type type);
-
-template <DataType S, typename T>
-void CopyLinearFLT4(const InternalTensor<Linear, S> &src, absl::Span<T> dst)
-{
-  const int dst_depth = dst.size();
-  for (int d = 0; d < dst_depth; ++d)
-  {
-    T val;
-    for (int i = 0; i < 4; ++i)
-    {
-      const int dst_ch = d * 4 + i;
-      val[i] = dst_ch >= src.shape.v ? 0.0f : src.data[dst_ch];
-    }
-    dst[d] = val;
-  }
-}
-
-absl::Status CreateCLBuffer(cl_context context, int size_in_bytes, bool read_only, void *data,
-                            cl_mem *result);
-
-cl_channel_type DataTypeToChannelType(DataType type, bool normalized = false);
-absl::Status CreateRGBAImage2D(cl_context context, int width, int height,
-                               cl_channel_type channel_type, void *data, cl_mem *result);
-
-template <DataType S, typename T>
-void RearrangeWeightsToOHWIOGroupI4O4(const InternalTensor<OHWI, S> &weights, int out_group_size,
-                                      absl::Span<T> dst)
-{
-  const int dst_slices = DivideRoundUp(weights.shape.o, 4);
-  const int src_slices = DivideRoundUp(weights.shape.i, 4);
-  const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
-
-  int counter = 0;
-  for (int d = 0; d < dst_groups; ++d)
-  {
-    for (int y = 0; y < weights.shape.h; ++y)
-    {
-      for (int x = 0; x < weights.shape.w; ++x)
-      {
-        for (int s = 0; s < src_slices; ++s)
-        {
-          for (int d_group = 0; d_group < out_group_size; ++d_group)
-          {
-            for (int j = 0; j < 4; ++j)
-            {
-              T filter;
-              for (int i = 0; i < 4; ++i)
-              {
-                const int s_ch = s * 4 + j;
-                const int d_ch = (d * out_group_size + d_group) * 4 + i;
-                if (s_ch < weights.shape.i && d_ch < weights.shape.o)
-                {
-                  const int f_index = weights.shape.LinearIndex({d_ch, y, x, s_ch});
-                  filter[i] = weights.data[f_index];
-                }
-                else
-                {
-                  filter[i] = 0.0f;
-                }
-              }
-              dst[counter++] = filter;
-            }
-          }
-        }
-      }
-    }
-  }
-}
-
-template <DataType S, typename T>
-void RearrangeWeightsToODHWIOGroupI4O4(const InternalTensor<OHWDI, S> &weights, int out_group_size,
-                                       absl::Span<T> dst)
-{
-  const int dst_slices = DivideRoundUp(weights.shape.o, 4);
-  const int src_slices = DivideRoundUp(weights.shape.i, 4);
-  const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
-
-  int counter = 0;
-  for (int d = 0; d < dst_groups; ++d)
-  {
-    for (int z = 0; z < weights.shape.d; ++z)
-    {
-      for (int y = 0; y < weights.shape.h; ++y)
-      {
-        for (int x = 0; x < weights.shape.w; ++x)
-        {
-          for (int s = 0; s < src_slices; ++s)
-          {
-            for (int d_group = 0; d_group < out_group_size; ++d_group)
-            {
-              for (int j = 0; j < 4; ++j)
-              {
-                T filter;
-                for (int i = 0; i < 4; ++i)
-                {
-                  const int s_ch = s * 4 + j;
-                  const int d_ch = (d * out_group_size + d_group) * 4 + i;
-                  if (s_ch < weights.shape.i && d_ch < weights.shape.o)
-                  {
-                    const int f_index = weights.shape.LinearIndex({d_ch, y, x, z, s_ch});
-                    filter[i] = weights.data[f_index];
-                  }
-                  else
-                  {
-                    filter[i] = 0.0f;
-                  }
-                }
-                dst[counter++] = filter;
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-}
-
-template <DataType S, typename T>
-void RearrangeWeightsToI4HWIOOGroupO4(const InternalTensor<OHWI, S> &weights, int out_group_size,
-                                      absl::Span<T> dst)
-{
-  const int dst_slices = DivideRoundUp(weights.shape.o, 4);
-  const int src_slices = DivideRoundUp(weights.shape.i, 4);
-  const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
-
-  int counter = 0;
-  for (int j = 0; j < 4; ++j)
-  {
-    for (int y = 0; y < weights.shape.h; ++y)
-    {
-      for (int x = 0; x < weights.shape.w; ++x)
-      {
-        for (int s = 0; s < src_slices; ++s)
-        {
-          for (int d = 0; d < dst_groups; ++d)
-          {
-            for (int d_group = 0; d_group < out_group_size; ++d_group)
-            {
-              T filter;
-              for (int i = 0; i < 4; ++i)
-              {
-                const int s_ch = s * 4 + j;
-                const int d_ch = (d * out_group_size + d_group) * 4 + i;
-                if (s_ch < weights.shape.i && d_ch < weights.shape.o)
-                {
-                  const int f_index = weights.shape.LinearIndex({d_ch, y, x, s_ch});
-                  filter[i] = weights.data[f_index];
-                }
-                else
-                {
-                  filter[i] = 0.0f;
-                }
-              }
-              dst[counter++] = filter;
-            }
-          }
-        }
-      }
-    }
-  }
-}
-
-template <DataType S, typename T>
-void RearrangeWeightsToI4DHWIOOGroupO4(const InternalTensor<OHWDI, S> &weights, int out_group_size,
-                                       absl::Span<T> dst)
-{
-  const int dst_slices = DivideRoundUp(weights.shape.o, 4);
-  const int src_slices = DivideRoundUp(weights.shape.i, 4);
-  const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
-
-  int counter = 0;
-  for (int j = 0; j < 4; ++j)
-  {
-    for (int z = 0; z < weights.shape.d; ++z)
-    {
-      for (int y = 0; y < weights.shape.h; ++y)
-      {
-        for (int x = 0; x < weights.shape.w; ++x)
-        {
-          for (int s = 0; s < src_slices; ++s)
-          {
-            for (int d = 0; d < dst_groups; ++d)
-            {
-              for (int d_group = 0; d_group < out_group_size; ++d_group)
-              {
-                T filter;
-                for (int i = 0; i < 4; ++i)
-                {
-                  const int s_ch = s * 4 + j;
-                  const int d_ch = (d * out_group_size + d_group) * 4 + i;
-                  if (s_ch < weights.shape.i && d_ch < weights.shape.o)
-                  {
-                    const int f_index = weights.shape.LinearIndex({d_ch, y, x, z, s_ch});
-                    filter[i] = weights.data[f_index];
-                  }
-                  else
-                  {
-                    filter[i] = 0.0f;
-                  }
-                }
-                dst[counter++] = filter;
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_UTIL_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/WinogradUtil.cc b/runtime/onert/backend/gpu_cl/open_cl/WinogradUtil.cc
deleted file mode 100644
index 5f1103ad9..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/WinogradUtil.cc
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "open_cl/WinogradUtil.h"
-
-#include <cmath>
-#include <vector>
-
-#include "open_cl/DataType.h"
-#include "open_cl/Shape.h"
-#include "open_cl/Tensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace
-{
-// Matrices for Winograd trasformations were computed with the method described
-// here https://openreview.net/pdf?id=H1ZaRZVKg
-std::vector<float> GetTransposedMatrixForWinograd(int width, int height)
-{
-  const float kDelta = std::sqrt(2.0f) / 2.0f;
-  std::vector<float> px(width);
-
-  px[0] = 0.0f;
-  const int points_count = (width - 1) / 2;
-  for (int i = 0; i < points_count; ++i)
-  {
-    px[i * 2 + 1] = kDelta * (i + 1.0f);
-    px[i * 2 + 2] = -kDelta * (i + 1.0f);
-  }
-  px[width - 1] = 1.0f;
-
-  std::vector<float> py(width, 1.0f);
-  py[width - 1] = 0.0f;
-
-  std::vector<float> result(height * width);
-  for (int y = 0; y < width; ++y)
-  {
-    for (int x = 0; x < height; ++x)
-    {
-      result[x * width + y] = std::pow(px[y], 1.0f * x) * std::pow(py[y], (height - 1.0f) - x);
-    }
-  }
-  return result;
-}
-
-std::vector<float> GetInversedMatrixForWinograd(int rank)
-{
-  auto matrix = GetTransposedMatrixForWinograd(rank, rank);
-  std::vector<float> inverted(rank * rank, 0.0f);
-  for (int i = 0; i < rank; ++i)
-  {
-    inverted[i * rank + i] = 1.0f;
-  }
-
-  for (int i = 1; i < rank - 1; ++i)
-  {
-    float inv_t = 1.0f / matrix[i * rank + i];
-    for (int x = i; x < rank; ++x)
-    {
-      matrix[i * rank + x] *= inv_t;
-    }
-    for (int x = 0; x < rank; ++x)
-    {
-      inverted[i * rank + x] *= inv_t;
-    }
-
-    for (int y = 0; y < rank; ++y)
-    {
-      if (y == i)
-        continue;
-      float t = matrix[y * rank + i];
-      for (int x = i; x < rank; ++x)
-      {
-        matrix[y * rank + x] -= t * matrix[i * rank + x];
-      }
-      for (int x = 0; x < rank; ++x)
-      {
-        inverted[y * rank + x] -= t * inverted[i * rank + x];
-      }
-    }
-  }
-
-  return inverted;
-}
-
-std::vector<float> Multiply(const std::vector<float> &a_mat, const std::vector<float> &b_mat, int m,
-                            int n, int k)
-{
-  std::vector<float> result(m * k);
-  for (int y = 0; y < m; ++y)
-  {
-    for (int x = 0; x < k; ++x)
-    {
-      float sum = 0.0f;
-      for (int i = 0; i < n; ++i)
-      {
-        sum += a_mat[y * n + i] * b_mat[i * k + x];
-      }
-      result[y * k + x] = sum;
-    }
-  }
-  return result;
-}
-} // namespace
-
-std::vector<float> AtMatrixForWinograd4x4To6x6() { return GetTransposedMatrixForWinograd(6, 4); }
-
-std::vector<float> BtMatrixForWinograd4x4To6x6() { return GetInversedMatrixForWinograd(6); }
-
-void RearrangeWeightsToWinograd4x4To6x6Weights(
-  const gpu_cl::InternalTensor<gpu_cl::OHWI, gpu_cl::DataType::FLOAT32> &src_weights,
-  gpu_cl::InternalTensor<gpu_cl::OHWI, gpu_cl::DataType::FLOAT32> *dst_weights)
-{
-  gpu_cl::OHWI dst_shape;
-  dst_shape.o = src_weights.shape.o;
-  dst_shape.h = 6;
-  dst_shape.w = 6;
-  dst_shape.i = src_weights.shape.i;
-  dst_weights->shape = dst_shape;
-  dst_weights->data.resize(dst_shape.DimensionsProduct());
-
-  auto gt_mat = GetTransposedMatrixForWinograd(6, 3);
-  std::vector<float> g_mat(gt_mat.size());
-  for (int y = 0; y < 3; ++y)
-  {
-    for (int x = 0; x < 6; ++x)
-    {
-      g_mat[x * 3 + y] = gt_mat[y * 6 + x];
-    }
-  }
-
-  for (int d = 0; d < src_weights.shape.o; ++d)
-  {
-    for (int s = 0; s < src_weights.shape.i; ++s)
-    {
-      std::vector<float> in_vals(9);
-      for (int y = 0; y < 3; ++y)
-      {
-        for (int x = 0; x < 3; ++x)
-        {
-          const int f_index = src_weights.shape.LinearIndex({d, y, x, s});
-          in_vals[y * 3 + x] = src_weights.data[f_index];
-        }
-      }
-
-      auto temp_vals = Multiply(g_mat, in_vals, 6, 3, 3);
-      auto out_vals = Multiply(temp_vals, gt_mat, 6, 3, 6);
-      for (int y = 0; y < 6; ++y)
-      {
-        for (int x = 0; x < 6; ++x)
-        {
-          const int f_index = dst_shape.LinearIndex({d, y, x, s});
-          dst_weights->data[f_index] = out_vals[y * 6 + x];
-        }
-      }
-    }
-  }
-}
-
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/WinogradUtil.h b/runtime/onert/backend/gpu_cl/open_cl/WinogradUtil.h
deleted file mode 100644
index 32e21760d..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/WinogradUtil.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_WINOGRAD_UTIL_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_WINOGRAD_UTIL_H__
-
-#include <vector>
-
-#include "open_cl/DataType.h"
-#include "open_cl/Shape.h"
-#include "open_cl/InternalTensor.h"
-
-namespace onert
-{
-namespace backend
-{
-
-// Matrices for Winograd trasformations received with method described here
-// https://openreview.net/pdf?id=H1ZaRZVKg
-
-// returns A transposed matrix(6 * 4) as array (24 values) for Winograd4x4To6x6
-std::vector<float> AtMatrixForWinograd4x4To6x6();
-
-// returns B transposed matrix(6 * 6) as array (36 values) for Winograd4x4To6x6
-std::vector<float> BtMatrixForWinograd4x4To6x6();
-
-void RearrangeWeightsToWinograd4x4To6x6Weights(
-  const gpu_cl::InternalTensor<gpu_cl::OHWI, gpu_cl::DataType::FLOAT32> &src_weights,
-  gpu_cl::InternalTensor<gpu_cl::OHWI, gpu_cl::DataType::FLOAT32> *dst_weights);
-
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_WINOGRAD_UTIL_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/WorkgroupSelection.cc b/runtime/onert/backend/gpu_cl/open_cl/WorkgroupSelection.cc
deleted file mode 100644
index 847c2a2aa..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/WorkgroupSelection.cc
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "WorkgroupSelection.h"
-
-#include <math.h>
-
-#include <set>
-#include <vector>
-
-#include "Util.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-namespace
-{
-
-template <typename T>
-void AddCornerCases(const T &grid, int max_work_group_total_size, const T &max_work_group_sizes,
-                    WorkGroupSizeAlignment x_alignment, WorkGroupSizeAlignment y_alignment,
-                    WorkGroupSizeAlignment z_alignment, std::vector<T> *work_groups)
-{
-  for (int x = 1; x <= 4; ++x)
-  {
-    for (int y = 1; y <= 4; ++y)
-    {
-      for (int z = 1; z <= 4; ++z)
-      {
-        u_int32_t wg_x = DivideRoundUp(grid.x, x);
-        u_int32_t wg_y = DivideRoundUp(grid.y, y);
-        u_int32_t wg_z = DivideRoundUp(grid.z, z);
-        if (wg_x > static_cast<u_int32_t>(max_work_group_sizes.x) ||
-            wg_y > static_cast<u_int32_t>(max_work_group_sizes.y) ||
-            wg_z > static_cast<u_int32_t>(max_work_group_sizes.z) ||
-            wg_x * wg_y * wg_z > static_cast<u_int32_t>(max_work_group_total_size))
-        {
-          continue;
-        }
-        if (x_alignment == WorkGroupSizeAlignment::PRECISE && grid.x % wg_x != 0)
-        {
-          continue;
-        }
-        if (y_alignment == WorkGroupSizeAlignment::PRECISE && grid.y % wg_y != 0)
-        {
-          continue;
-        }
-        if (z_alignment == WorkGroupSizeAlignment::PRECISE && grid.z % wg_z != 0)
-        {
-          continue;
-        }
-        work_groups->push_back({wg_x, wg_y, wg_z});
-      }
-    }
-  }
-
-  // this will add at least {1, 1, 1} always.
-  for (u_int32_t x = 1; x <= 4; ++x)
-  {
-    for (u_int32_t y = 1; y <= 4; ++y)
-    {
-      for (u_int32_t z = 1; z <= 4; ++z)
-      {
-        if (x > static_cast<u_int32_t>(max_work_group_sizes.x) ||
-            y > static_cast<u_int32_t>(max_work_group_sizes.y) ||
-            z > static_cast<u_int32_t>(max_work_group_sizes.z) ||
-            x * y * z > static_cast<u_int32_t>(max_work_group_total_size))
-        {
-          continue;
-        }
-        if (x_alignment == WorkGroupSizeAlignment::PRECISE && grid.x % x != 0)
-        {
-          continue;
-        }
-        if (y_alignment == WorkGroupSizeAlignment::PRECISE && grid.y % y != 0)
-        {
-          continue;
-        }
-        if (z_alignment == WorkGroupSizeAlignment::PRECISE && grid.z % z != 0)
-        {
-          continue;
-        }
-        work_groups->push_back({x, y, z});
-      }
-    }
-  }
-}
-
-std::vector<int> GetDivisors(int number)
-{
-  const int max_divisor = static_cast<int>(sqrt(number));
-  std::vector<int> divisors;
-  // we don't know the number of dividers, so it is just heuristic.
-  divisors.reserve(max_divisor / 3 + 1);
-  for (int i = 1; i <= max_divisor; ++i)
-  {
-    const int d = number / i;
-    if (i * d == number)
-    {
-      divisors.push_back(i);
-      if (d != i)
-      {
-        divisors.push_back(d);
-      }
-    }
-  }
-  return divisors;
-}
-
-std::vector<int> GetDivisorsForRange(int number, int range)
-{
-  const int last_number = number + range;
-  const int max_divisor = static_cast<int>(sqrt(last_number));
-  std::set<int> divisors;
-  for (int i = 1; i <= max_divisor; ++i)
-  {
-    const int reminder = number % i;
-    // iterate through numbers that divisible by i in our range;
-    const int first_number = number + (i - reminder) % i;
-    if (first_number <= last_number)
-    {
-      divisors.insert(i);
-    }
-    for (int j = first_number; j <= last_number; j += i)
-    {
-      const int d = j / i;
-      if (d != i)
-      {
-        divisors.insert(d);
-      }
-    }
-  }
-  return std::vector<int>(divisors.begin(), divisors.end());
-}
-
-} // namespace
-
-std::vector<int> GetPossibleSizes(int number, WorkGroupSizeAlignment z_alignment)
-{
-  if (z_alignment == WorkGroupSizeAlignment::PRECISE)
-  {
-    // we will use for potential sizes, sizes that cover grid precisely
-    // work group size * k (k is integer) == grid_size
-    return GetDivisors(number);
-  }
-  else
-  {
-    // when we chose work group size we can use work group size that
-    //   work group size * k (k is integer) != grid_size (slightly bigger)
-    // so in this heuristic we trying to find potential size, that satisfies
-    //   to this : work group size * k (k is integer) <= grid_size + 5
-    //   and this : work group size * k (k is integer) >= grid_size
-    return GetDivisorsForRange(number, 5);
-  }
-}
-
-template <typename T>
-std::vector<T>
-GenerateWorkGroupSizes(const T &grid, int min_work_group_total_size, int max_work_group_total_size,
-                       const T &max_work_group_sizes, WorkGroupSizeAlignment x_alignment,
-                       WorkGroupSizeAlignment y_alignment, WorkGroupSizeAlignment z_alignment)
-{
-  std::vector<T> work_groups;
-  work_groups.reserve(64);
-
-  std::vector<int> sizes_x = GetPossibleSizes(grid.x, x_alignment);
-  std::vector<int> sizes_y = GetPossibleSizes(grid.y, y_alignment);
-  std::vector<int> sizes_z = GetPossibleSizes(grid.z, z_alignment);
-
-  for (auto x : sizes_x)
-  {
-    if (static_cast<int>(x) > static_cast<int>(max_work_group_sizes.x))
-      continue;
-    for (auto y : sizes_y)
-    {
-      if (static_cast<int>(y) > static_cast<int>(max_work_group_sizes.y))
-        continue;
-      for (auto z : sizes_z)
-      {
-        if (static_cast<int>(z) > static_cast<int>(max_work_group_sizes.z))
-          continue;
-        const int work_group_size = x * y * z;
-        if (work_group_size < min_work_group_total_size ||
-            work_group_size > max_work_group_total_size)
-          continue;
-        work_groups.push_back({x, y, z});
-      }
-    }
-  }
-
-  return work_groups;
-}
-
-// Specializations of GenerateWorkGroupSizes for int3 and uint3
-
-template std::vector<int3> GenerateWorkGroupSizes(const int3 &grid, int min_work_group_total_size,
-                                                  int max_work_group_total_size,
-                                                  const int3 &max_work_group_sizes,
-                                                  WorkGroupSizeAlignment x_alignment,
-                                                  WorkGroupSizeAlignment y_alignment,
-                                                  WorkGroupSizeAlignment z_alignment);
-
-template std::vector<uint3> GenerateWorkGroupSizes(const uint3 &grid, int min_work_group_total_size,
-                                                   int max_work_group_total_size,
-                                                   const uint3 &max_work_group_sizes,
-                                                   WorkGroupSizeAlignment x_alignment,
-                                                   WorkGroupSizeAlignment y_alignment,
-                                                   WorkGroupSizeAlignment z_alignment);
-
-template <typename T>
-void GenerateWorkGroupSizesAlignedToGrid(const T &grid, const T &max_work_group_size,
-                                         const int max_work_group_invocations,
-                                         std::vector<T> *work_groups)
-{
-  auto alignment = WorkGroupSizeAlignment::PRECISE;
-  *work_groups =
-    GenerateWorkGroupSizes<T>(grid, /*min_work_group_total_size = */ 32, max_work_group_invocations,
-                              max_work_group_size, alignment, alignment, alignment);
-  // If the grid parameter too small, method below cannot generate workgroups.
-  if (work_groups->empty())
-  {
-    AddCornerCases(grid, max_work_group_invocations, max_work_group_size, alignment, alignment,
-                   alignment, work_groups);
-  }
-}
-
-// Specializations of GenerateWorkGroupSizesAlignedToGrid for int3 and uint3
-
-template void GenerateWorkGroupSizesAlignedToGrid(const int3 &grid, const int3 &max_work_group_size,
-                                                  const int max_work_group_invocations,
-                                                  std::vector<int3> *work_groups);
-
-template void GenerateWorkGroupSizesAlignedToGrid(const uint3 &grid,
-                                                  const uint3 &max_work_group_size,
-                                                  const int max_work_group_invocations,
-                                                  std::vector<uint3> *work_groups);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/WorkgroupSelection.h b/runtime/onert/backend/gpu_cl/open_cl/WorkgroupSelection.h
deleted file mode 100644
index b0702ac7c..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/WorkgroupSelection.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_WORK_GROUP_SELECTION_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_WORK_GROUP_SELECTION_H__
-
-#include <vector>
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// PRECISE assume that WorkGroupSize * k = GridSize;
-// NO_ALIGNMENT no restrictions;
-// We need PRECISE when we don't have check in kernel for boundaries
-// If we have the check, we can use PRECISE or NO_ALIGNMENT as well.
-enum class WorkGroupSizeAlignment
-{
-  PRECISE,
-  NO_ALIGNMENT
-};
-
-std::vector<int> GetPossibleSizes(int number, WorkGroupSizeAlignment z_alignment);
-
-// Specializations exist for int3 and uint3 in the .cc file
-
-template <typename T>
-std::vector<T>
-GenerateWorkGroupSizes(const T &grid, int min_work_group_total_size, int max_work_group_total_size,
-                       const T &max_work_group_sizes, WorkGroupSizeAlignment x_alignment,
-                       WorkGroupSizeAlignment y_alignment, WorkGroupSizeAlignment z_alignment);
-
-template <typename T>
-void GenerateWorkGroupSizesAlignedToGrid(const T &grid, const T &max_work_group_size,
-                                         const int max_work_group_invocations,
-                                         std::vector<T> *work_groups);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_WORK_GROUP_SELECTION_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Add.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Add.cc
deleted file mode 100644
index 09100fe1f..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Add.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Add.h"
-
-#include <cstring>
-#include <string>
-
-#include "absl/strings/str_cat.h"
-#include "Util.h"
-#include "open_cl/Util.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-GPUOperation CreateAdd(const OperationDef &definition, const std::vector<int> &channels,
-                       int dst_channels)
-{
-  GPUOperation add(definition);
-  int dst_depth = DivideRoundUp(dst_channels, 4);
-  int src0_depth = DivideRoundUp(channels[0], 4);
-  add.elementwise_ = true;
-  add.linkable_ = dst_depth == src0_depth;
-  if (src0_depth < dst_depth)
-  {
-    add.check_src_channels_size_ = true;
-  }
-  for (uint32_t i = 1; i < definition.src_tensors.size(); ++i)
-  {
-    const std::string tensor_name = absl::StrCat("src_data_", i);
-    auto src_desc = definition.src_tensors[i];
-    if (definition.IsBatchSupported())
-    {
-      src_desc.SetStateVar("BatchedWidth", "true");
-    }
-    add.AddSrcTensor(tensor_name, src_desc);
-    add.code_ += "if (S_COORD < args." + tensor_name + ".Slices()) {\n";
-    add.code_ += "  in_out_value += args." + tensor_name + ".Read(X_COORD, Y_COORD, S_COORD);\n";
-    add.code_ += "}\n";
-  }
-  return add;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Add.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Add.h
deleted file mode 100644
index 2335a901c..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Add.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_ADD_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_ADD_H__
-
-#include <string>
-#include <vector>
-
-#include "GpuOperation.h"
-#include "open_cl/Operations.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// Add operation supports not equal tensors on input (for possibility to
-// remove Padding operation with zeroes in channels dimension)
-GPUOperation CreateAdd(const OperationDef &definition, const std::vector<int> &channels,
-                       int dst_channels);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_ADD_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvBuffer1x1.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvBuffer1x1.cc
deleted file mode 100644
index 1b9014fdf..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvBuffer1x1.cc
+++ /dev/null
@@ -1,480 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "open_cl/kernels/ConvBuffer1x1.h"
-
-#include <array>
-#include <string>
-#include <utility>
-
-#include "open_cl/ClDevice.h"
-#include "open_cl/kernels/Util.h"
-#include "open_cl/kernels/WorkGroupPicking.h"
-#include "open_cl/Precision.h"
-#include "open_cl/TensorType.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-// element_size must be 1, 2 or 4
-// 1 - is FLT4
-// 2 - is FLT8
-// 4 - is FLT16
-// This function generates code for arithmetic part of convolution
-std::string GetComputationPart(const int3 &block_size, int element_size,
-                               CalculationsPrecision precision)
-{
-  const std::string hexes[16] = {"0", "1", "2", "3", "4", "5", "6", "7",
-                                 "8", "9", "a", "b", "c", "d", "e", "f"};
-  std::string c;
-  for (int z = 0; z < block_size.z; ++z)
-  {
-    const std::string z_s = std::to_string(z);
-    c += "    FLT16 W" + z_s + " = weights_cache[" + z_s + "];\n";
-    for (int y = 0; y < block_size.y; ++y)
-    {
-      for (int x = 0; x < block_size.x; ++x)
-      {
-        std::string s_index = std::to_string(y * block_size.x + x);
-        for (int e = 0; e < element_size; ++e)
-        {
-          std::string r_index = z_s + std::to_string(y) + std::to_string(x * element_size + e);
-          const std::string f0 = "W" + z_s + ".s0123";
-          const std::string f1 = "W" + z_s + ".s4567";
-          const std::string f2 = "W" + z_s + ".s89ab";
-          const std::string f3 = "W" + z_s + ".scdef";
-          switch (precision)
-          {
-            case CalculationsPrecision::F32:
-            case CalculationsPrecision::F16:
-              c += "    r" + r_index + " += " + f0 + " * s" + s_index + ".s" + hexes[e * 4 + 0] +
-                   ";\n";
-              c += "    r" + r_index + " += " + f1 + " * s" + s_index + ".s" + hexes[e * 4 + 1] +
-                   ";\n";
-              c += "    r" + r_index + " += " + f2 + " * s" + s_index + ".s" + hexes[e * 4 + 2] +
-                   ";\n";
-              c += "    r" + r_index + " += " + f3 + " * s" + s_index + ".s" + hexes[e * 4 + 3] +
-                   ";\n";
-              break;
-            case CalculationsPrecision::F32_F16:
-              c += "    r" + r_index + " += convert_float4(" + f0 + " * s" + s_index + ".s" +
-                   hexes[e * 4 + 0] + " + " + f1 + " * s" + s_index + ".s" + hexes[e * 4 + 1] +
-                   " + " + f2 + " * s" + s_index + ".s" + hexes[e * 4 + 2] + " + " + f3 + " * s" +
-                   s_index + ".s" + hexes[e * 4 + 3] + ");\n";
-              break;
-          }
-        }
-      }
-    }
-  }
-  return c;
-}
-
-ConvBuffer1x1::ConvParams GetBestParams(const DeviceInfo &device_info,
-                                        const OperationDef &definition, const BHWC &shape, int,
-                                        int dst_depth)
-{
-  ConvBuffer1x1::ConvParams conv_params;
-  conv_params.element_size = 4;
-  conv_params.block_size = int3(1, 1, 1);
-  if (!device_info.IsMali())
-  {
-    return conv_params;
-  }
-  bool can_use_flt8 =
-    (shape.w * shape.b) % 2 == 0 && definition.precision != CalculationsPrecision::F32;
-  bool is_midgard = device_info.IsMali() && device_info.mali_info.IsMidgard();
-  if (is_midgard)
-  {
-    if (can_use_flt8)
-    {
-      conv_params.element_size = 8;
-    }
-    if (definition.precision == CalculationsPrecision::F16 || !can_use_flt8)
-    {
-      conv_params.block_size.x = 2;
-    }
-    return conv_params;
-  }
-
-  int task_size = shape.w * shape.b * shape.h * dst_depth;
-  int block_size = GetRecommendedBlockSizeForConv(device_info, definition.precision, task_size);
-
-  if (!can_use_flt8 && block_size > 4)
-  {
-    block_size = 4;
-  }
-
-  if (can_use_flt8 && block_size >= 2)
-  {
-    conv_params.element_size = 8;
-    block_size /= 2;
-  }
-  if (block_size == 4)
-  {
-    conv_params.block_size.x = 2;
-    if (definition.precision == CalculationsPrecision::F32 && dst_depth < 32)
-    {
-      conv_params.block_size.y = 2;
-    }
-    else
-    {
-      conv_params.block_size.z = 2;
-    }
-  }
-  else if (block_size == 2)
-  {
-    if (dst_depth >= 32)
-    {
-      conv_params.block_size.z = 2;
-    }
-    else
-    {
-      conv_params.block_size.x = 2;
-    }
-  }
-
-  return conv_params;
-}
-
-ConvBuffer1x1::ConvParams GetBestParams(const DeviceInfo &device_info,
-                                        const OperationDef &definition, int, int)
-{
-  ConvBuffer1x1::ConvParams conv_params;
-  conv_params.element_size = 4;
-  conv_params.block_size = int3(1, 1, 1);
-  if (device_info.IsMali() && definition.precision == CalculationsPrecision::F16 &&
-      device_info.compute_units_count <= 4)
-  {
-    conv_params.block_size.x *= 2;
-  }
-  return conv_params;
-}
-
-} // namespace
-
-ConvBuffer1x1::ConvBuffer1x1(const OperationDef &definition, const ConvParams &conv_params)
-  : GPUOperation(definition), conv_params_(conv_params)
-{
-  code_ = GenerateConvBuffer1x1(definition_, conv_params_, &args_);
-  work_group_size_ = int3(2, 4, 1);
-}
-
-ConvBuffer1x1::ConvBuffer1x1(ConvBuffer1x1 &&operation)
-  : GPUOperation(std::move(operation)), conv_params_(std::move(operation.conv_params_))
-{
-}
-
-ConvBuffer1x1 &ConvBuffer1x1::operator=(ConvBuffer1x1 &&operation)
-{
-  if (this != &operation)
-  {
-    std::swap(conv_params_, operation.conv_params_);
-    GPUOperation::operator=(std::move(operation));
-  }
-  return *this;
-}
-
-std::string ConvBuffer1x1::GenerateConvBuffer1x1(const OperationDef &op_def,
-                                                 const ConvBuffer1x1::ConvParams &conv_params,
-                                                 Arguments *)
-{
-  auto src_desc = op_def.src_tensors[0];
-  if (op_def.IsBatchSupported())
-  {
-    src_desc.SetStateVar("BatchedWidth", "true");
-  }
-  if (conv_params_.element_size == 8)
-  {
-    src_desc.SetStateVar("ElementsX2", "true");
-  }
-  else if (conv_params_.element_size == 16)
-  {
-    src_desc.SetStateVar("ElementsX4", "true");
-  }
-  AddSrcTensor("src_tensor", src_desc);
-  if (op_def.src_tensors.size() == 2)
-  {
-    // dynamic weights
-    BufferDescriptor desc;
-    desc.element_type = op_def.src_tensors[1].data_type;
-    desc.element_size = 16;
-    desc.memory_type = MemoryType::GLOBAL;
-    AddSrcBuffer("weights", desc);
-  }
-
-  auto dst_desc = op_def.dst_tensors[0];
-  if (op_def.IsBatchSupported())
-  {
-    dst_desc.SetStateVar("BatchedWidth", "true");
-  }
-  AddDstTensor("dst_tensor", dst_desc);
-
-  std::string c = GetCommonDefines(op_def.precision);
-  switch (op_def.precision)
-  {
-    case CalculationsPrecision::F32:
-      c += "#define FLT8 float8\n";
-      c += "#define FLT16 float16\n";
-      break;
-    case CalculationsPrecision::F32_F16:
-    case CalculationsPrecision::F16:
-      c += "#define FLT8 half8\n";
-      c += "#define FLT16 half16\n";
-      break;
-  }
-
-  const int3 block_size = conv_params.block_size;
-  const int element_size = conv_params.element_size / 4;
-
-  c += "__kernel void main_function(\n";
-  c += "$0) {\n";
-  c += "  int X = get_global_id(0) * " + std::to_string(block_size.x * element_size) + ";\n";
-  c += "  int X_SRC = get_global_id(0) * " + std::to_string(block_size.x) + ";\n";
-  c += "  int Y = get_global_id(1) * " + std::to_string(block_size.y) + ";\n";
-  c += "  int Z = get_global_id(2) * " + std::to_string(block_size.z) + ";\n";
-  c += "  if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || "
-       "Z >= args.dst_tensor.Slices()) return;\n";
-  if (conv_params.different_weights_for_height)
-  {
-    c += "  __global FLT16* weights_cache = args.weights.GetPtr() + (Z * "
-         "args.src_tensor.Height() + "
-         "Y * " +
-         std::to_string(block_size.z) +
-         ") * "
-         "args.src_tensor.Slices();\n";
-  }
-  else
-  {
-    c += "  __global FLT16* weights_cache = args.weights.GetPtr() + Z * "
-         "args.src_tensor.Slices();\n";
-  }
-  for (int z = 0; z < block_size.z; ++z)
-  {
-    const std::string z_s = std::to_string(z);
-    c += "  ACCUM_FLT4 bias_val_" + z_s + " = TO_ACCUM_TYPE(args.biases.Read(Z + " + z_s + "));\n";
-    for (int y = 0; y < block_size.y; ++y)
-    {
-      for (int x = 0; x < block_size.x * element_size; ++x)
-      {
-        c += "  ACCUM_FLT4 r" + z_s + std::to_string(y) + std::to_string(x) + " = bias_val_" + z_s +
-             ";\n";
-      }
-    }
-  }
-  for (int x = 0; x < block_size.x; ++x)
-  {
-    std::string x_s = std::to_string(x);
-    c += "  int xc" + x_s + " = min(X_SRC + " + std::to_string(x) +
-         ", args.src_tensor.Width() - 1);\n";
-  }
-  for (int y = 0; y < block_size.y; ++y)
-  {
-    std::string y_s = std::to_string(y);
-    c += "  int yc" + y_s + " = min(Y + " + y_s + ", args.src_tensor.Height() - 1);\n";
-  }
-  for (int y = 0; y < block_size.y; ++y)
-  {
-    std::string y_s = std::to_string(y);
-    for (int x = 0; x < block_size.x; ++x)
-    {
-      std::string x_s = std::to_string(x);
-      std::string i_s = std::to_string(y * block_size.x + x);
-      c += "  int src_addr_" + i_s + " = (yc" + y_s + ") * args.src_tensor.Width() + (xc" + x_s +
-           ");\n";
-    }
-  }
-  c += "  for (int s = 0; s < args.src_tensor.Slices(); ++s) {\n";
-  for (int y = 0; y < block_size.y; ++y)
-  {
-    std::string y_s = std::to_string(y);
-    for (int x = 0; x < block_size.x; ++x)
-    {
-      std::string x_s = std::to_string(x);
-      std::string i_s = std::to_string(y * block_size.x + x);
-      c += "    FLT" + std::to_string(element_size * 4) + " s" + i_s +
-           " = args.src_tensor.Read(src_addr_" + i_s + ");\n";
-    }
-  }
-  c += GetComputationPart(block_size, element_size, op_def.precision);
-  for (int i = 0; i < block_size.x * block_size.y; ++i)
-  {
-    std::string i_s = std::to_string(i);
-    c += "    src_addr_" + i_s + " += args.src_tensor.SliceStride();\n";
-  }
-  c += "    weights_cache += " + std::to_string(block_size.z) + ";\n";
-  c += "  }\n"; // SRC_SLICES
-
-  for (int z = 0; z < block_size.z; ++z)
-  {
-    const std::string z_s = std::to_string(z);
-    if (z != 0)
-    {
-      c += "  if (Z + " + z_s + " >= args.dst_tensor.Slices()) return;\n";
-    }
-    for (int y = 0; y < block_size.y; ++y)
-    {
-      const std::string y_s = std::to_string(y);
-      for (int x = 0; x < block_size.x * element_size; ++x)
-      {
-        const std::string x_s = std::to_string(x);
-        c += "  if (X + " + x_s + " < args.dst_tensor.Width() && Y + " + y_s +
-             " < args.dst_tensor.Height()) {\n";
-        c += "    FLT4 res = TO_FLT4(r" + z_s + y_s + x_s + ");\n";
-        c += "    args.dst_tensor.Write(res, X + " + x_s + ", Y + " + y_s + ", Z + " + z_s + ");\n";
-        c += "  }\n";
-      }
-    }
-  }
-  c += "}\n";
-  return c;
-}
-
-int3 ConvBuffer1x1::GetGridSize() const
-{
-  const int dst_width_elements =
-    DivideRoundUp(dst_[0]->Width() * dst_[0]->Batch(), (conv_params_.element_size / 4));
-  const int grid_x = DivideRoundUp(dst_width_elements, conv_params_.block_size.x);
-  const int grid_y = DivideRoundUp(dst_[0]->Height(), conv_params_.block_size.y);
-  const int grid_z = DivideRoundUp(dst_[0]->Slices(), conv_params_.block_size.z);
-  return int3(grid_x, grid_y, grid_z);
-}
-
-void ConvBuffer1x1::GetPossibleKernelWorkGroups(TuningType tuning_type,
-                                                const DeviceInfo &device_info,
-                                                const KernelInfo &kernel_info,
-                                                std::vector<int3> *work_groups) const
-{
-  GetPossibleWorkGroupsConv(tuning_type, device_info, kernel_info, grid_size_, work_groups);
-}
-
-bool IsConvBuffer1x1Supported(const OperationDef &definition, const Convolution2DAttributes &attr)
-{
-  auto src_storage_type = definition.src_tensors[0].storage_type;
-  return src_storage_type == TensorStorageType::BUFFER && attr.weights.shape.w == 1 &&
-         attr.weights.shape.h == 1 && attr.dilations.w == 1 && attr.dilations.h == 1 &&
-         attr.strides.w == 1 && attr.strides.h == 1 && attr.padding.prepended.w == 0 &&
-         attr.padding.prepended.h == 0 && attr.padding.appended.w == 0 &&
-         attr.padding.appended.h == 0;
-}
-
-bool IsConvBuffer1x1Supported(const OperationDef &definition, const BHWC &weights_shape,
-                              const Convolution2DAttributes &attr)
-{
-  auto src_storage_type = definition.src_tensors[0].storage_type;
-  return src_storage_type == TensorStorageType::BUFFER && weights_shape.w == 1 &&
-         weights_shape.h == 1 && attr.dilations.w == 1 && attr.dilations.h == 1 &&
-         attr.strides.w == 1 && attr.strides.h == 1 && attr.padding.prepended.w == 0 &&
-         attr.padding.prepended.h == 0 && attr.padding.appended.w == 0 &&
-         attr.padding.appended.h == 0;
-}
-
-ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo &device_info, const OperationDef &definition,
-                                  const Convolution2DAttributes &attr, const BHWC *shape)
-{
-  const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4);
-  const int src_depth = DivideRoundUp(attr.weights.shape.i, 4);
-  ConvBuffer1x1::ConvParams conv_params;
-  if (shape)
-  {
-    conv_params = GetBestParams(device_info, definition, *shape, src_depth, dst_depth);
-  }
-  else
-  {
-    conv_params = GetBestParams(device_info, definition, src_depth, dst_depth);
-  }
-  ConvBuffer1x1 result(definition, conv_params);
-  result.UploadData(attr.weights, attr.bias);
-  return result;
-}
-
-ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo &device_info, const OperationDef &definition,
-                                  const FullyConnectedAttributes &attr, const BHWC *shape)
-{
-  const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4);
-  const int src_depth = DivideRoundUp(attr.weights.shape.i, 4);
-  ConvBuffer1x1::ConvParams conv_params;
-  if (shape)
-  {
-    conv_params = GetBestParams(device_info, definition, *shape, src_depth, dst_depth);
-  }
-  else
-  {
-    conv_params = GetBestParams(device_info, definition, src_depth, dst_depth);
-  }
-  conv_params.block_size.x *= conv_params.block_size.y;
-  conv_params.block_size.y = 1;
-  ConvBuffer1x1 result(definition, conv_params);
-  result.UploadData(attr.weights, attr.bias);
-  return result;
-}
-
-ConvBuffer1x1 CreateConvBuffer1x1Wino4x4To6x6(const DeviceInfo &device_info,
-                                              const OperationDef &definition,
-                                              const Convolution2DAttributes &attr,
-                                              const BHWC *shape)
-{
-  const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4);
-  const int src_depth = DivideRoundUp(attr.weights.shape.i, 4);
-  ConvBuffer1x1::ConvParams conv_params;
-  if (shape)
-  {
-    conv_params = GetBestParams(device_info, definition, *shape, src_depth, dst_depth);
-  }
-  else
-  {
-    conv_params = GetBestParams(device_info, definition, src_depth, dst_depth);
-  }
-  conv_params.block_size.x *= conv_params.block_size.y;
-  conv_params.block_size.y = 1;
-  conv_params.different_weights_for_height = true;
-  ConvBuffer1x1 result(definition, conv_params);
-  result.UploadDataForWinograd4x4To6x6(attr.weights);
-  return result;
-}
-
-ConvBuffer1x1 CreateConvBuffer1x1DynamicWeights(const DeviceInfo &device_info,
-                                                const OperationDef &definition,
-                                                const Convolution2DAttributes &attr,
-                                                const BHWC &weights_shape, const BHWC *dst_shape)
-{
-  const int dst_depth = DivideRoundUp(weights_shape.b, 4);
-  const int src_depth = DivideRoundUp(weights_shape.c, 4);
-  ConvBuffer1x1::ConvParams conv_params;
-  if (dst_shape)
-  {
-    conv_params = GetBestParams(device_info, definition, *dst_shape, src_depth, dst_depth);
-  }
-  else
-  {
-    conv_params = GetBestParams(device_info, definition, src_depth, dst_depth);
-  }
-  ConvBuffer1x1 result(definition, conv_params);
-  result.UploadBiases(attr.bias);
-  return result;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvBuffer1x1.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvBuffer1x1.h
deleted file mode 100644
index 0abd6051f..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvBuffer1x1.h
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_BUFFER_1X1_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_BUFFER_1X1_H__
-
-#include "open_cl/Buffer.h"
-#include "open_cl/ClKernel.h"
-#include "open_cl/kernels/ConvCommon.h"
-#include "open_cl/kernels/GpuOperation.h"
-#include "open_cl/kernels/Util.h"
-#include "open_cl/LinearStorage.h"
-#include "open_cl/Precision.h"
-#include "open_cl/InternalTensor.h"
-#include "open_cl/Util.h"
-#include "open_cl/DataType.h"
-#include "open_cl/Operations.h"
-#include "open_cl/Shape.h"
-#include "open_cl/Status.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Types.h"
-#include "open_cl/WinogradUtil.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-class ConvBuffer1x1 : public GPUOperation
-{
-public:
-  ConvBuffer1x1() = default;
-
-  // Move only
-  ConvBuffer1x1(ConvBuffer1x1 &&operation);
-  ConvBuffer1x1 &operator=(ConvBuffer1x1 &&operation);
-  ConvBuffer1x1(const ConvBuffer1x1 &) = delete;
-  ConvBuffer1x1 &operator=(const ConvBuffer1x1 &) = delete;
-
-  void GetPossibleKernelWorkGroups(TuningType tuning_type, const DeviceInfo &device_info,
-                                   const KernelInfo &kernel_info,
-                                   std::vector<int3> *work_groups) const override;
-  int3 GetGridSize() const override;
-
-  ConvWeightsDescription GetConvWeightsDescription() const
-  {
-    ConvWeightsDescription desc;
-    desc.layout = ConvWeightsLayout::kOHWIOGroupI4O4;
-    desc.output_group_size = conv_params_.block_size.z;
-    return desc;
-  }
-
-  struct ConvParams
-  {
-    int3 block_size = int3(1, 1, 1);
-    int element_size = 4; // can be 4, 8 or 16
-
-    // By default in 2d convolution we have the same weights for WH dims, but in
-    // some cases we need separate weights for H dimension and convolution
-    // kernel requires very small modifications to support it.
-    bool different_weights_for_height = false;
-  };
-
-private:
-  ConvBuffer1x1(const OperationDef &definition, const ConvParams &conv_params);
-  friend ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo &device_info,
-                                           const OperationDef &definition,
-                                           const Convolution2DAttributes &attr, const BHWC *shape);
-  friend ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo &device_info,
-                                           const OperationDef &definition,
-                                           const FullyConnectedAttributes &attr, const BHWC *shape);
-  friend ConvBuffer1x1 CreateConvBuffer1x1Wino4x4To6x6(const DeviceInfo &device_info,
-                                                       const OperationDef &definition,
-                                                       const Convolution2DAttributes &attr,
-                                                       const BHWC *shape);
-  friend ConvBuffer1x1 CreateConvBuffer1x1DynamicWeights(const DeviceInfo &device_info,
-                                                         const OperationDef &definition,
-                                                         const Convolution2DAttributes &attr,
-                                                         const BHWC &weights_shape,
-                                                         const BHWC *dst_shape);
-
-  template <DataType T>
-  void UploadData(const InternalTensor<OHWI, T> &weights, const InternalTensor<Linear, T> &biases);
-  template <DataType T> void UploadDataForWinograd4x4To6x6(const InternalTensor<OHWI, T> &weights);
-
-  template <DataType T> void UploadWeights(const InternalTensor<OHWI, T> &weights);
-
-  template <DataType T> void UploadBiases(const InternalTensor<Linear, T> &biases);
-
-  std::string GenerateConvBuffer1x1(const OperationDef &op_def,
-                                    const ConvBuffer1x1::ConvParams &conv_params, Arguments *args);
-
-  ConvParams conv_params_;
-};
-
-template <DataType T>
-void ConvBuffer1x1::UploadData(const InternalTensor<OHWI, T> &weights,
-                               const InternalTensor<Linear, T> &biases)
-{
-  UploadWeights(weights);
-  UploadBiases(biases);
-}
-
-template <DataType T>
-void ConvBuffer1x1::UploadDataForWinograd4x4To6x6(const InternalTensor<OHWI, T> &weights)
-{
-  InternalTensor<OHWI, T> wino_weights;
-  RearrangeWeightsToWinograd4x4To6x6Weights(weights, &wino_weights);
-  UploadWeights(wino_weights);
-  InternalTensor<Linear, DataType::FLOAT32> bias;
-  bias.shape = Linear(weights.shape.o);
-  bias.data.resize(weights.shape.o, 0.0f);
-  UploadBiases(bias);
-}
-
-template <DataType T> void ConvBuffer1x1::UploadWeights(const InternalTensor<OHWI, T> &weights)
-{
-  const int dst_depth = DivideRoundUp(weights.shape.o, 4);
-  const int src_depth = DivideRoundUp(weights.shape.i, 4);
-
-  const bool f32_weights = definition_.precision == CalculationsPrecision::F32;
-  const int float4_size = sizeof(float4);
-  // TODO
-  // f32_weights ? sizeof(float4) : sizeof(half4);
-
-  const int dst_depth_aligned = AlignByN(dst_depth, conv_params_.block_size.z);
-  const int elements_count = weights.shape.h * weights.shape.w * src_depth * dst_depth_aligned * 4;
-
-  BufferDescriptor desc;
-  desc.element_type = f32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
-  desc.element_size = 16;
-  desc.memory_type = MemoryType::GLOBAL;
-  desc.size = float4_size * elements_count;
-  desc.data.resize(desc.size);
-
-  if (f32_weights)
-  {
-    float4 *ptr = reinterpret_cast<float4 *>(desc.data.data());
-    RearrangeWeightsToOHWIOGroupI4O4(weights, conv_params_.block_size.z,
-                                     absl::MakeSpan(ptr, elements_count));
-  }
-  //   else
-  //   {
-  //     half4 *ptr = reinterpret_cast<half4 *>(desc.data.data());
-  //     RearrangeWeightsToOHWIOGroupI4O4(weights, conv_params_.block_size.z,
-  //                                      absl::MakeSpan(ptr, elements_count));
-  //   }
-
-  args_.AddObject("weights", absl::make_unique<BufferDescriptor>(std::move(desc)));
-}
-
-template <DataType T> void ConvBuffer1x1::UploadBiases(const InternalTensor<Linear, T> &biases)
-{
-  TensorLinearDescriptor desc;
-  desc.storage_type = LinearStorageType::BUFFER;
-  desc.element_type = definition_.GetDataType();
-  int depth = AlignByN(biases.shape.v, 4 * conv_params_.block_size.z) / 4;
-  desc.UploadLinearData(biases, depth);
-  args_.AddObject("biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
-}
-
-bool IsConvBuffer1x1Supported(const OperationDef &definition, const Convolution2DAttributes &attr);
-
-bool IsConvBuffer1x1Supported(const OperationDef &definition, const BHWC &weights_shape,
-                              const Convolution2DAttributes &attr);
-
-ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo &device_info, const OperationDef &definition,
-                                  const Convolution2DAttributes &attr, const BHWC *shape = nullptr);
-
-ConvBuffer1x1 CreateConvBuffer1x1(const DeviceInfo &device_info, const OperationDef &definition,
-                                  const FullyConnectedAttributes &attr,
-                                  const BHWC *shape = nullptr);
-
-ConvBuffer1x1 CreateConvBuffer1x1DynamicWeights(const DeviceInfo &device_info,
-                                                const OperationDef &definition,
-                                                const Convolution2DAttributes &attr,
-                                                const BHWC &weights_shape,
-                                                const BHWC *dst_shape = nullptr);
-
-ConvBuffer1x1 CreateConvBuffer1x1Wino4x4To6x6(const DeviceInfo &device_info,
-                                              const OperationDef &definition,
-                                              const Convolution2DAttributes &attr,
-                                              const BHWC *shape = nullptr);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_BUFFER_1X1_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvConstants.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvConstants.cc
deleted file mode 100644
index 0a51bab5c..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvConstants.cc
+++ /dev/null
@@ -1,282 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "open_cl/kernels/ConvConstants.h"
-
-#include <string>
-#include <utility>
-
-#include "open_cl/kernels/Util.h"
-#include "open_cl/kernels/WorkGroupPicking.h"
-#include "open_cl/Precision.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-// Adreno can provide up to ~3-4KB of constant memory, but in some cases even
-// 3KB can have very bad performance.
-int GetAdrenoOptimalMaxConstantSize(int gpu_version)
-{
-  if (gpu_version < 600)
-  {
-    return 256 * 10; // 2.5KB
-  }
-  else
-  {
-    return 256 * 14; // 3.5KB
-  }
-}
-
-int GetOptimalMaxConstantSize(const DeviceInfo &info)
-{
-  if (!info.IsAdreno())
-  {
-    // In general we do not expect that this kernel will be used with non Adreno
-    // so as it tuned for __constant memory that have big profit on Adreno
-    return 1024; // 1KB
-  }
-  else
-  {
-    return GetAdrenoOptimalMaxConstantSize(info.adreno_info.gpu_version);
-  }
-}
-
-std::string GenerateConvolutionConstantCode(const OperationDef &op_def, const OHWI &weights_shape,
-                                            bool stride_correction, GPUOperation *op)
-{
-  auto src_desc = op_def.src_tensors[0];
-  src_desc.SetTextureAddressMode(TextureAddressMode::ZERO);
-  if (op_def.IsBatchSupported())
-  {
-    src_desc.SetStateVar("BatchedWidth", "true");
-  }
-  op->AddSrcTensor("src_tensor", src_desc);
-
-  auto dst_desc = op_def.dst_tensors[0];
-  if (op_def.IsBatchSupported())
-  {
-    dst_desc.SetStateVar("BatchedWidth", "true");
-  }
-  op->AddDstTensor("dst_tensor", dst_desc);
-
-  std::string c = GetCommonDefines(op_def.precision);
-
-  const int out_z = DivideRoundUp(weights_shape.o, 4);
-  const std::string kOutZ = std::to_string(out_z);
-  const int src_depth = DivideRoundUp(weights_shape.i, 4);
-
-  const auto src_tensor_type = op_def.src_tensors[0].storage_type;
-  const bool manual_clamp = src_tensor_type == TensorStorageType::BUFFER ||
-                            src_tensor_type == TensorStorageType::IMAGE_BUFFER;
-
-  switch (op_def.precision)
-  {
-    case CalculationsPrecision::F32:
-    case CalculationsPrecision::F16:
-      c += "#define CONV4(R, SRC, F, i) \\\n";
-      c += "  R += SRC.x * F[i + 0]; \\\n";
-      c += "  R += SRC.y * F[i + 1]; \\\n";
-      c += "  R += SRC.z * F[i + 2]; \\\n";
-      c += "  R += SRC.w * F[i + 3];   \n";
-
-      c += "#define CONV3(R, SRC, F, i) \\\n";
-      c += "  R += SRC.x * F[i + 0]; \\\n";
-      c += "  R += SRC.y * F[i + 1]; \\\n";
-      c += "  R += SRC.z * F[i + 2]; \n";
-
-      c += "#define CONV2(R, SRC, F, i) \\\n";
-      c += "  R += SRC.x * F[i + 0]; \\\n";
-      c += "  R += SRC.y * F[i + 1]; \n";
-
-      c += "#define CONV1(R, SRC, F, i) \\\n";
-      c += "  R += SRC * F[i + 0]; \n";
-      break;
-    case CalculationsPrecision::F32_F16:
-      c += "#define CONV4(R, SRC, F, i) \\\n";
-      c += "  R += convert_float4(SRC.x * F[i + 0] + SRC.y * F[i + 1]";
-      c += " + SRC.z * F[i + 2] + SRC.w * F[i + 3]);\n";
-
-      c += "#define CONV3(R, SRC, F, i) \\\n";
-      c += "  R += convert_float4(SRC.x * F[i + 0] + SRC.y * F[i + 1]";
-      c += " + SRC.z * F[i + 2]);\n";
-
-      c += "#define CONV2(R, SRC, F, i) \\\n";
-      c += "  R += convert_float4(SRC.x * F[i + 0] + SRC.y * F[i + 1]);\n";
-
-      c += "#define CONV1(R, SRC, F, i) \\\n";
-      c += "  R += convert_float4(SRC * F[i + 0]);\n";
-      break;
-  }
-
-  const std::string postfixes[] = {".x", ".xy", ".xyz", ""};
-
-  c += "__kernel void main_function(\n";
-  c += "$0) {\n";
-  c += "  int X = get_global_id(0);\n";
-  c += "  int Y = get_global_id(1);\n";
-  c += "  if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height()) "
-       "return;\n";
-  if (stride_correction)
-  {
-    c += "  int start_x = " +
-         GetXStrideCorrectedV2("X", "args.src_tensor.Batch()", "args.stride_x", "args.padding_x") +
-         ";\n";
-  }
-  else
-  {
-    if (op_def.IsBatchSupported())
-    {
-      c += "  int start_x = X * args.stride_x + args.padding_x * "
-           "args.src_tensor.Batch();\n";
-    }
-    else
-    {
-      c += "  int start_x = X * args.stride_x + args.padding_x;\n";
-    }
-  }
-  c += "  int start_y = Y * args.stride_y + args.padding_y;\n";
-  c += "  ACCUM_FLT4 r[" + kOutZ + "];\n";
-  c += "  for (int i = 0; i < " + kOutZ + "; ++i) {\n";
-  c += "    r[i] = (ACCUM_FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n";
-  c += "  }\n";
-  int filters_counter = 0;
-  for (int s = 0; s < src_depth; ++s)
-  {
-    const int ch_count = std::min(4, weights_shape.i - s * 4);
-    const std::string s_conv = "CONV" + std::to_string(ch_count);
-    const std::string s_count = ch_count == 1 ? "" : std::to_string(ch_count);
-    const std::string s_type = absl::StrCat("FLT", s_count);
-    const std::string s_postfix = postfixes[ch_count - 1];
-    const std::string dilation_x =
-      op_def.IsBatchSupported() ? "args.dilation_x * args.src_tensor.Batch()" : "args.dilation_x";
-    for (int ky = 0; ky < weights_shape.h; ++ky)
-    {
-      std::string s_y = absl::StrCat("(start_y + ", ky, " * args.dilation_y)");
-      if (manual_clamp)
-      {
-        c += "  {\n";
-        c += "  bool y_out = " + s_y + " < 0 || " + s_y + " >= args.src_tensor.Height();\n";
-      }
-      for (int kx = 0; kx < weights_shape.w; ++kx)
-      {
-        c += "  {\n";
-        std::string s_x = absl::StrCat("(start_x + ", kx, " * " + dilation_x + ")");
-        if (manual_clamp)
-        {
-          c += "    bool x_out = " + s_x + "< 0 || " + s_x + ">= args.src_tensor.Width();\n";
-          c += "    " + s_type + " src = x_out || y_out ?";
-          c += "(" + s_type + ")(0.0) : args.src_tensor.Read(" + s_x + ", " + s_y + ", " +
-               std::to_string(s) + ")" + s_postfix + ";\n";
-        }
-        else
-        {
-          c += "    " + s_type + " src = args.src_tensor.Read(" + s_x + ", " + s_y + ", " +
-               std::to_string(s) + ")" + s_postfix + ";\n";
-        }
-        for (int d = 0; d < out_z; ++d)
-        {
-          c += "    " + s_conv + "(r[" + std::to_string(d) + "], src, args.weigths.GetPtr(),";
-          c += " " + std::to_string(filters_counter) + ");\n";
-          filters_counter += ch_count;
-        }
-        c += "  }\n";
-      }
-      if (manual_clamp)
-      {
-        c += "  }\n";
-      }
-    }
-  }
-  for (int i = 0; i < out_z; ++i)
-  {
-    std::string s_i = std::to_string(i);
-    c += "  {\n";
-    c += "    FLT4 res = TO_FLT4(r[" + s_i + "]) + args.biases.Read(" + s_i + ");\n";
-    c += "  args.dst_tensor.Write(res, X, Y, " + s_i + ");\n";
-    c += "  }\n";
-  }
-  c += "}\n";
-  return c;
-}
-
-} // namespace
-
-bool IsConvConstantsSupported(const DeviceInfo &device_info, const OperationDef &definition,
-                              const Convolution2DAttributes &attr)
-{
-  if (device_info.IsAMD() && definition.precision != CalculationsPrecision::F32 &&
-      definition.src_tensors[0].storage_type != TensorStorageType::BUFFER)
-  {
-    // BUG, some AMD gpus crashe without it
-    return false;
-  }
-
-  const auto &w_shape = attr.weights.shape;
-  const int dst_channels = AlignByN(w_shape.o, 4);
-  const int filters_count = w_shape.i * dst_channels * w_shape.h * w_shape.w;
-  const int float_size = sizeof(float);
-  // TODO F32 and F16
-  // definition.precision == CalculationsPrecision::F32 ? sizeof(float) : sizeof(half);
-  const int filters_buffer_size = filters_count * float_size;
-  const int kConstantMaxSize = GetOptimalMaxConstantSize(device_info);
-  const int flt4_registers = DivideRoundUp(w_shape.o, 4);
-  return filters_buffer_size <= kConstantMaxSize && flt4_registers <= 8;
-}
-
-GPUOperation CreateConvConstants(const DeviceInfo &device_info, const OperationDef &definition,
-                                 const Convolution2DAttributes &attr)
-{
-  GPUOperation op(definition);
-  UploadWeightsForConvConstants(attr.weights, definition.precision, &op);
-  op.args_.AddInt("stride_x", attr.strides.w);
-  op.args_.AddInt("stride_y", attr.strides.h);
-  op.args_.AddInt("padding_x", -attr.padding.prepended.w);
-  op.args_.AddInt("padding_y", -attr.padding.prepended.h);
-  op.args_.AddInt("dilation_x", attr.dilations.w);
-  op.args_.AddInt("dilation_y", attr.dilations.h);
-  op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_ZIs1;
-
-  const bool stride_correction = definition.IsBatchSupported() && attr.strides.w != 1;
-  op.code_ =
-    GenerateConvolutionConstantCode(definition, attr.weights.shape, stride_correction, &op);
-  if (definition.precision == CalculationsPrecision::F16 && device_info.IsAdreno3xx())
-  {
-    op.compiler_options_.push_back(CompilerOptions::ADRENO_FULL_SIMD_LINE);
-  }
-  if (definition.precision != CalculationsPrecision::F32 && device_info.IsPowerVR())
-  {
-    // BUG, some PowerVRs (GE8320) produce incorrect result without it
-    op.compiler_options_.push_back(CompilerOptions::CL_OPT_DISABLE);
-  }
-
-  TensorLinearDescriptor desc;
-  desc.storage_type = LinearStorageType::BUFFER;
-  desc.element_type = definition.GetDataType();
-  desc.memory_type = MemoryType::CONSTANT;
-  desc.UploadLinearData(attr.bias);
-  op.args_.AddObject("biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
-  return op;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvConstants.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvConstants.h
deleted file mode 100644
index be6670c53..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvConstants.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_CONSTANTS_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_CONSTANTS_H__
-
-#include "open_cl/Buffer.h"
-#include "open_cl/kernels/GpuOperation.h"
-#include "open_cl/LinearStorage.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Util.h"
-#include "open_cl/DataType.h"
-#include "open_cl/Operations.h"
-#include "open_cl/Shape.h"
-#include "open_cl/Status.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Types.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-template <DataType S, typename T>
-void RearrangeWeightsForConvConstants(const InternalTensor<OHWI, S> &weights, absl::Span<T> dst)
-{
-  const int dst_depth = DivideRoundUp(weights.shape.o, 4);
-  const int src_depth = DivideRoundUp(weights.shape.i, 4);
-  const int kernel_x = weights.shape.w;
-  const int kernel_y = weights.shape.h;
-
-  int counter = 0;
-  for (int s = 0; s < src_depth; ++s)
-  {
-    for (int y = 0; y < kernel_y; ++y)
-    {
-      for (int x = 0; x < kernel_x; ++x)
-      {
-        for (int d = 0; d < dst_depth; ++d)
-        {
-          const int channels_count = std::min(4, weights.shape.i - s * 4);
-          T filters[4];
-          for (int i = 0; i < 4; ++i)
-          {
-            for (int j = 0; j < channels_count; ++j)
-            {
-              const int s_ch = s * 4 + j;
-              const int d_ch = d * 4 + i;
-              if (s_ch < weights.shape.i && d_ch < weights.shape.o)
-              {
-                const int f_index = weights.shape.LinearIndex({d_ch, y, x, s_ch});
-                filters[i][j] = weights.data[f_index];
-              }
-              else
-              {
-                filters[i][j] = 0.0f;
-              }
-            }
-          }
-          T filters_new[4];
-          for (int i = 0; i < 4; ++i)
-          {
-            for (int j = 0; j < 4; ++j)
-            {
-              filters_new[i][j] = filters[j][i];
-            }
-          }
-          for (int i = 0; i < channels_count; ++i)
-          {
-            dst[counter++] = filters_new[i];
-          }
-        }
-      }
-    }
-  }
-}
-
-template <DataType T>
-void UploadWeightsForConvConstants(const InternalTensor<OHWI, T> &weights,
-                                   CalculationsPrecision precision, GPUOperation *op)
-{
-  const int dst_depth = DivideRoundUp(weights.shape.o, 4);
-  const int kernel_x = weights.shape.w;
-  const int kernel_y = weights.shape.h;
-
-  const bool f32_weights = precision == CalculationsPrecision::F32;
-  const int float_size = f32_weights ? 4 : 2;
-  const int float_count = weights.shape.i * dst_depth * 4 * kernel_x * kernel_y;
-
-  BufferDescriptor desc;
-  desc.element_type = f32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
-  desc.element_size = 4;
-  desc.memory_type = MemoryType::CONSTANT;
-  desc.size = float_size * float_count;
-  desc.data.resize(desc.size);
-
-  if (f32_weights)
-  {
-    float4 *ptr = reinterpret_cast<float4 *>(desc.data.data());
-    RearrangeWeightsForConvConstants(weights, absl::MakeSpan(ptr, float_count / 4));
-  }
-  //   else
-  //   {
-  //     half4 *ptr = reinterpret_cast<half4 *>(desc.data.data());
-  //     RearrangeWeightsForConvConstants(weights, absl::MakeSpan(ptr, float_count / 4));
-  //   }
-
-  op->args_.AddObject("weigths", absl::make_unique<BufferDescriptor>(std::move(desc)));
-}
-
-bool IsConvConstantsSupported(const DeviceInfo &device_info, const OperationDef &definition,
-                              const Convolution2DAttributes &attr);
-
-GPUOperation CreateConvConstants(const DeviceInfo &device_info, const OperationDef &definition,
-                                 const Convolution2DAttributes &attr);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_CONSTANTS_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvPowervr.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvPowervr.cc
deleted file mode 100644
index 5cb0c2719..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvPowervr.cc
+++ /dev/null
@@ -1,1653 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "open_cl/kernels/ConvPowervr.h"
-
-#include <algorithm>
-#include <string>
-#include <utility>
-
-#include "absl/strings/substitute.h"
-#include "open_cl/kernels/Util.h"
-#include "open_cl/kernels/WorkGroupPicking.h"
-#include "open_cl/Precision.h"
-#include "open_cl/TensorType.h"
-#include "open_cl/DataType.h"
-#include "open_cl/Shape.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-std::string GenerateUploadByThreads(const std::string &local_ptr_name,
-                                    const std::string &global_ptr_name,
-                                    const std::string &global_offset_name,
-                                    const std::string &lid_name, int total_work_items,
-                                    int elements_to_upload)
-{
-  std::string c;
-  std::string offset = global_offset_name.empty() ? "" : global_offset_name + " + ";
-  const int groups = elements_to_upload / total_work_items;
-  const int reminder = elements_to_upload % total_work_items;
-  for (int i = 0; i < groups; ++i)
-  {
-    c += "    " + local_ptr_name + "[" + lid_name + " + " + std::to_string(total_work_items * i) +
-         "] = " + global_ptr_name + "[" + offset + lid_name + " + " +
-         std::to_string(total_work_items * i) + "];\n";
-  }
-  if (reminder != 0)
-  {
-    c += "    if (" + lid_name + " < " + std::to_string(reminder) + ") {\n";
-    c += "      " + local_ptr_name + "[" + lid_name + " + " +
-         std::to_string(total_work_items * groups) + "] = " + global_ptr_name + "[" + offset +
-         lid_name + " + " + std::to_string(total_work_items * groups) + "];\n";
-    c += "    }\n";
-  }
-  return c;
-}
-
-std::string GenerateAsyncUpload(const std::string &local_ptr_name,
-                                const std::string &global_ptr_name,
-                                const std::string &global_offset_name, int elements_to_upload)
-{
-  std::string c;
-  std::string offset = global_offset_name.empty() ? "" : " + " + global_offset_name;
-  c += "    async_work_group_copy(" + local_ptr_name + ", " + global_ptr_name + offset + ", " +
-       std::to_string(elements_to_upload) + ", 0);\n";
-  return c;
-}
-
-std::string GenerateBlockCoords(const int4 &block_size, const int3 &work_group_launch_order,
-                                bool linear_spatial, bool need_depth)
-{
-  std::string c;
-  int3 launch_remap;
-  launch_remap[work_group_launch_order.x] = 0;
-  launch_remap[work_group_launch_order.y] = 1;
-  launch_remap[work_group_launch_order.z] = 2;
-  if (linear_spatial)
-  {
-    if (work_group_launch_order[0] == 0)
-    {
-      c += "  int linear_spatial = get_global_id(0);\n";
-    }
-    else
-    {
-      c += "  int linear_spatial = get_group_id(" + std::to_string(launch_remap[0]) +
-           ") * get_local_size(0) + get_local_id(0);\n";
-    }
-    if (need_depth)
-    {
-      c += "  int DST_X = (linear_spatial % args.task_size_x) * " + std::to_string(block_size.x) +
-           ";\n";
-      c += "  linear_spatial = linear_spatial / args.task_size_x;\n";
-      c += "  int DST_Y = (linear_spatial % args.task_size_y) * " + std::to_string(block_size.y) +
-           ";\n";
-      c += "  int DST_Z = (linear_spatial / args.task_size_y) * " + std::to_string(block_size.z) +
-           ";\n";
-    }
-    else
-    {
-      c += "  int DST_Y = (linear_spatial / args.task_size_x) * " + std::to_string(block_size.y) +
-           ";\n";
-      c += "  int DST_X = (linear_spatial % args.task_size_x) * " + std::to_string(block_size.x) +
-           ";\n";
-    }
-    if (work_group_launch_order[1] == 1)
-    {
-      c += "  int DST_S = get_global_id(1) * " + std::to_string(block_size.w) + ";\n";
-    }
-    else
-    {
-      c += "  int DST_S = (get_group_id(" + std::to_string(launch_remap[1]) +
-           ") * get_local_size(1) + get_local_id(1)) * " + std::to_string(block_size.w) + ";\n";
-    }
-  }
-  else
-  {
-    if (work_group_launch_order[0] == 0)
-    {
-      c += "  int DST_X = get_global_id(0) * " + std::to_string(block_size.x) + ";\n";
-    }
-    else
-    {
-      c += "  int DST_X = (get_group_id(" + std::to_string(launch_remap[0]) +
-           ") * get_local_size(0) + get_local_id(0)) * " + std::to_string(block_size.x) + ";\n";
-    }
-    std::string global_id_1;
-    if (work_group_launch_order[1] == 1)
-    {
-      global_id_1 = "get_global_id(1)";
-    }
-    else
-    {
-      global_id_1 = "(get_group_id(" + std::to_string(launch_remap[1]) +
-                    ") * get_local_size(1) + get_local_id(1))";
-    }
-    if (need_depth)
-    {
-      c += "  int linear_id_1 = " + global_id_1 + ";\n";
-      c +=
-        "  int DST_Z = (linear_id_1 / args.task_size_y) * " + std::to_string(block_size.z) + ";\n";
-      c +=
-        "  int DST_Y = (linear_id_1 % args.task_size_y) * " + std::to_string(block_size.y) + ";\n";
-    }
-    else
-    {
-      c += "  int DST_Y = " + global_id_1 + " * " + std::to_string(block_size.y) + ";\n";
-    }
-    if (work_group_launch_order[2] == 2)
-    {
-      c += "  int DST_S = get_global_id(2) * " + std::to_string(block_size.w) + ";\n";
-    }
-    else
-    {
-      c += "  int DST_S = (get_group_id(" + std::to_string(launch_remap[2]) +
-           ") * get_local_size(2) + get_local_id(2)) * " + std::to_string(block_size.w) + ";\n";
-    }
-  }
-
-  return c;
-}
-} // namespace
-
-ConvPowerVR::ConvPowerVR(const OperationDef &definition, const Convolution2DAttributes &attr,
-                         const DeviceInfo &device_info, const BHWC *dst_shape)
-  : GPUOperation(definition), stride_(attr.strides.w, attr.strides.h, 1, 1),
-    padding_(-attr.padding.prepended.w, -attr.padding.prepended.h, 0, 0),
-    kernel_size_(attr.weights.shape.w, attr.weights.shape.h, 1, 1),
-    dilation_(attr.dilations.w, attr.dilations.h, 1, 1),
-    conv_params_(GuessBestParams(device_info, definition, attr, dst_shape))
-{
-}
-
-ConvPowerVR::ConvPowerVR(const OperationDef &definition, const Convolution2DAttributes &attr,
-                         const BHWC &weights_shape, const DeviceInfo &device_info,
-                         const BHWC *dst_shape)
-  : GPUOperation(definition), stride_(attr.strides.w, attr.strides.h, 1, 1),
-    padding_(-attr.padding.prepended.w, -attr.padding.prepended.h, 0, 0),
-    kernel_size_(weights_shape.w, weights_shape.h, 1, 1),
-    dilation_(attr.dilations.w, attr.dilations.h, 1, 1),
-    conv_params_(GuessBestParams(device_info, definition, attr, weights_shape, dst_shape))
-{
-}
-
-ConvPowerVR::ConvPowerVR(const OperationDef &definition, const FullyConnectedAttributes &attr,
-                         const DeviceInfo &device_info, const BHWC *dst_shape)
-  : GPUOperation(definition), stride_(1, 1, 1, 1), padding_(0, 0, 0, 0), kernel_size_(1, 1, 1, 1),
-    dilation_(1, 1, 1, 1), conv_params_(GuessBestParams(device_info, definition, attr, dst_shape))
-{
-}
-
-ConvPowerVR::ConvPowerVR(const OperationDef &definition)
-  : GPUOperation(definition), stride_(1, 1, 1, 1), padding_(0, 0, 0, 0), kernel_size_(1, 1, 1, 1),
-    dilation_(1, 1, 1, 1)
-{
-}
-
-ConvPowerVR::ConvPowerVR(ConvPowerVR &&operation)
-  : GPUOperation(std::move(operation)), stride_(operation.stride_), padding_(operation.padding_),
-    kernel_size_(operation.kernel_size_), dilation_(operation.dilation_),
-    conv_params_(operation.conv_params_)
-{
-}
-
-ConvPowerVR::ConvPowerVR(const OperationDef &definition, const Convolution3DAttributes &attr,
-                         const DeviceInfo &device_info, const BHWDC *dst_shape)
-  : GPUOperation(definition), stride_(attr.strides.w, attr.strides.h, attr.strides.d, 1),
-    padding_(-attr.padding.prepended.w, -attr.padding.prepended.h, -attr.padding.prepended.d, 0),
-    kernel_size_(attr.weights.shape.w, attr.weights.shape.h, attr.weights.shape.d, 1),
-    dilation_(attr.dilations.w, attr.dilations.h, attr.dilations.d, 1),
-    conv_params_(GuessBestParams(device_info, definition, attr, dst_shape))
-{
-}
-
-ConvPowerVR &ConvPowerVR::operator=(ConvPowerVR &&operation)
-{
-  if (this != &operation)
-  {
-    std::swap(stride_, operation.stride_);
-    std::swap(padding_, operation.padding_);
-    std::swap(kernel_size_, operation.kernel_size_);
-    std::swap(dilation_, operation.dilation_);
-    std::swap(conv_params_, operation.conv_params_);
-    GPUOperation::operator=(std::move(operation));
-  }
-  return *this;
-}
-
-void ConvPowerVR::GenerateCode(const DeviceInfo &device_info)
-{
-  if (conv_params_.linear_spatial)
-  {
-    grid_dimension_ = 2;
-  }
-  const bool stride_correction = definition_.IsBatchSupported() && stride_.x != 1;
-  code_ = GenerateConv(device_info, definition_, stride_correction, conv_params_);
-  if (definition_.precision == CalculationsPrecision::F16 && device_info.IsPowerVR())
-  {
-    compiler_options_.push_back(CompilerOptions::POWERVR_FP16);
-  }
-  if (conv_params_.IsPrivateMemBroadcast() && device_info.IsCL20OrHigher())
-  {
-    compiler_options_.push_back(CompilerOptions::CL_2_0);
-  }
-  bool kernel_is_trivial = conv_params_.x_kernel_is_1 && conv_params_.y_kernel_is_1;
-  if (definition_.src_tensors[0].HasAxis(Axis::DEPTH))
-  {
-    kernel_is_trivial = kernel_is_trivial & conv_params_.z_kernel_is_1;
-  }
-  if (device_info.IsAdreno3xx() && definition_.precision == CalculationsPrecision::F16 &&
-      kernel_is_trivial)
-  {
-    compiler_options_.push_back(CompilerOptions::ADRENO_FULL_SIMD_LINE);
-  }
-}
-
-absl::Status ConvPowerVR::BindArguments(ArgumentsBinder *args)
-{
-  if (!conv_params_.x_kernel_is_1)
-  {
-    RETURN_IF_ERROR(args->SetInt("stride_x", stride_.x));
-    RETURN_IF_ERROR(args->SetInt("padding_x", padding_.x * src_[0]->Batch()));
-    RETURN_IF_ERROR(args->SetInt("kernel_size_x", kernel_size_.x));
-    RETURN_IF_ERROR(args->SetInt("dilation_x", dilation_.x * src_[0]->Batch()));
-  }
-  if (!conv_params_.y_kernel_is_1)
-  {
-    RETURN_IF_ERROR(args->SetInt("stride_y", stride_.y));
-    RETURN_IF_ERROR(args->SetInt("padding_y", padding_.y));
-    RETURN_IF_ERROR(args->SetInt("kernel_size_y", kernel_size_.y));
-    RETURN_IF_ERROR(args->SetInt("dilation_y", dilation_.y));
-  }
-  if (definition_.src_tensors[0].HasAxis(Axis::DEPTH) && !conv_params_.z_kernel_is_1)
-  {
-    RETURN_IF_ERROR(args->SetInt("stride_z", stride_.z));
-    RETURN_IF_ERROR(args->SetInt("padding_z", padding_.z));
-    RETURN_IF_ERROR(args->SetInt("kernel_size_z", kernel_size_.z));
-    RETURN_IF_ERROR(args->SetInt("dilation_z", dilation_.z));
-  }
-  if (conv_params_.linear_spatial)
-  {
-    const int grid_x =
-      DivideRoundUp(dst_[0]->Width() * dst_[0]->Batch(), conv_params_.block_size.x);
-    RETURN_IF_ERROR(args->SetInt("task_size_x", grid_x));
-  }
-  if (definition_.src_tensors[0].HasAxis(Axis::DEPTH))
-  {
-    const int task_size_y = DivideRoundUp(dst_[0]->Height(), conv_params_.block_size.y);
-    RETURN_IF_ERROR(args->SetInt("task_size_y", task_size_y));
-  }
-  return absl::OkStatus();
-}
-
-int3 ConvPowerVR::GetGridSize() const
-{
-  const int task_size_x =
-    DivideRoundUp(dst_[0]->Width() * dst_[0]->Batch(), conv_params_.block_size.x);
-  const int task_size_y = DivideRoundUp(dst_[0]->Height(), conv_params_.block_size.y);
-  const int task_size_z = DivideRoundUp(dst_[0]->Depth(), conv_params_.block_size.z);
-  const int task_size_s = DivideRoundUp(dst_[0]->Slices(), conv_params_.block_size.w);
-  int3 wg;
-
-  if (conv_params_.linear_spatial)
-  {
-    int grid_x = task_size_x * task_size_y;
-    if (definition_.src_tensors[0].HasAxis(Axis::DEPTH))
-    {
-      grid_x *= task_size_z;
-    }
-    return int3(grid_x, task_size_s, 1);
-  }
-  else
-  {
-    int grid_y = task_size_y;
-    if (definition_.src_tensors[0].HasAxis(Axis::DEPTH))
-    {
-      grid_y *= task_size_z;
-    }
-    return int3(task_size_x, grid_y, task_size_s);
-  }
-}
-
-void ConvPowerVR::GetPossibleKernelWorkGroups(TuningType tuning_type, const DeviceInfo &device_info,
-                                              const KernelInfo &kernel_info,
-                                              std::vector<int3> *work_groups) const
-{
-  if (conv_params_.weights_upload_type == WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP ||
-      conv_params_.weights_upload_type == WeightsUploadType::LOCAL_MEM_BY_THREADS ||
-      conv_params_.fixed_work_group_size)
-  {
-    work_groups->push_back(work_group_size_);
-    return;
-  }
-  GetPossibleWorkGroupsConv(tuning_type, device_info, kernel_info, grid_size_, work_groups);
-}
-
-std::string ConvPowerVR::GenerateConv(const DeviceInfo &device_info, const OperationDef &op_def,
-                                      bool stride_correction, const ConvParams &conv_params)
-{
-  auto src_desc = op_def.src_tensors[0];
-  src_desc.SetTextureAddressMode(TextureAddressMode::ZERO);
-  if (op_def.IsBatchSupported())
-  {
-    src_desc.SetStateVar("BatchedWidth", "true");
-  }
-  AddSrcTensor("src_tensor", src_desc);
-  if (op_def.src_tensors.size() == 2)
-  {
-    // dynamic weights
-    BufferDescriptor desc;
-    desc.element_type = op_def.src_tensors[1].data_type;
-    desc.element_size = 4;
-    desc.memory_type =
-      conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::CONSTANT_MEM
-        ? MemoryType::CONSTANT
-        : MemoryType::GLOBAL;
-
-    AddSrcBuffer("weights", desc);
-  }
-
-  const auto &src_def = op_def.src_tensors[0];
-
-  auto generate_id = [&](const std::string &x, const std::string &y, const std::string &z) {
-    std::string id;
-    if (src_def.HasAxis(Axis::WIDTH))
-    {
-      id += "_w" + x;
-    }
-    if (src_def.HasAxis(Axis::HEIGHT))
-    {
-      id += "_h" + y;
-    }
-    if (src_def.HasAxis(Axis::DEPTH))
-    {
-      id += "_d" + z;
-    }
-    return id;
-  };
-
-  auto generate_id_full = [&](const std::string &x, const std::string &y, const std::string &z,
-                              const std::string &s) { return generate_id(x, y, z) + "_s" + s; };
-
-  auto generate_check = [&](const std::string &x, const std::string &y, const std::string &z) {
-    std::string check;
-    const std::vector<Axis> axes{Axis::WIDTH, Axis::HEIGHT, Axis::DEPTH};
-    const std::vector<std::string> names{"in_x", "in_y", "in_z"};
-    const std::vector<bool> is_1{conv_params_.x_kernel_is_1, conv_params_.y_kernel_is_1,
-                                 conv_params_.z_kernel_is_1};
-    const std::vector<std::string> coords{x, y, z};
-    for (size_t i = 0; i < axes.size(); ++i)
-    {
-      const auto &axis = axes[i];
-      if (src_def.HasAxis(axis) && !src_def.SupportsZeroClamp(axis) && !is_1[i])
-      {
-        if (!check.empty())
-        {
-          check += " && ";
-        }
-        check += names[i] + coords[i];
-      }
-    }
-    return check;
-  };
-
-  auto dst_desc = op_def.dst_tensors[0];
-  if (op_def.IsBatchSupported())
-  {
-    dst_desc.SetStateVar("BatchedWidth", "true");
-  }
-  AddDstTensor("dst_tensor", dst_desc);
-
-  if (!conv_params_.x_kernel_is_1)
-  {
-    args_.AddInt("stride_x");
-    args_.AddInt("padding_x");
-    args_.AddInt("kernel_size_x");
-    args_.AddInt("dilation_x");
-  }
-  if (!conv_params_.y_kernel_is_1)
-  {
-    args_.AddInt("stride_y");
-    args_.AddInt("padding_y");
-    args_.AddInt("kernel_size_y");
-    args_.AddInt("dilation_y");
-  }
-  if (src_def.HasAxis(Axis::DEPTH) && !conv_params_.z_kernel_is_1)
-  {
-    args_.AddInt("stride_z");
-    args_.AddInt("padding_z");
-    args_.AddInt("kernel_size_z");
-    args_.AddInt("dilation_z");
-  }
-  if (conv_params_.linear_spatial)
-  {
-    args_.AddInt("task_size_x");
-  }
-  if (src_def.HasAxis(Axis::DEPTH))
-  {
-    args_.AddInt("task_size_y");
-  }
-
-  const bool need_local_mem =
-    conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::LOCAL_MEM_BY_THREADS ||
-    conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP;
-
-  const int local_mem_size = conv_params.block_size.w * 4 * conv_params.src_depth_loop_size;
-
-  const bool use_simd_broadcast = conv_params.IsPrivateMemBroadcast();
-  const int simd_size = conv_params.simd_size;
-
-  const bool late_oob_check = need_local_mem || use_simd_broadcast;
-
-  const std::string weights_space =
-    conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::CONSTANT_MEM ? "__constant"
-                                                                                    : "__global";
-
-  const std::string weights_data_type =
-    conv_params.weights_data_type == DataType::FLOAT32 ? "float4" : "half4";
-
-  const std::string weights_global_ptr = weights_space + " " + weights_data_type + "*";
-
-  std::string c = GetCommonDefines(op_def.precision);
-  if (use_simd_broadcast)
-  {
-    if (device_info.cl_version == OpenCLVersion::CL_2_0)
-    {
-      c += "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n";
-    }
-    else if (device_info.SupportsExtension("cl_intel_subgroups"))
-    {
-      c += "#pragma OPENCL EXTENSION cl_intel_subgroups : enable\n";
-    }
-  }
-  const int4 block_size = conv_params.block_size;
-  if (conv_params.fixed_work_group_size)
-  {
-    c += "__attribute__((reqd_work_group_size(" + std::to_string(work_group_size_.x) + ", " +
-         std::to_string(work_group_size_.y) + ", " + std::to_string(work_group_size_.z) + ")))\n";
-  }
-  if (use_simd_broadcast && device_info.IsIntel())
-  {
-    c += "__attribute__((intel_reqd_sub_group_size(" + std::to_string(simd_size) + ")))\n";
-  }
-  std::string dst_oob_check;
-  if (src_def.HasAxis(Axis::DEPTH))
-  {
-    if (conv_params.linear_spatial)
-    {
-      dst_oob_check = "DST_Z >= args.dst_tensor.Depth() || DST_S >= "
-                      "args.dst_tensor.Slices()";
-    }
-    else
-    {
-      dst_oob_check = "DST_X >= args.dst_tensor.Width() || DST_Z >= "
-                      "args.dst_tensor.Depth() || DST_S >= args.dst_tensor.Slices()";
-    }
-  }
-  else
-  {
-    if (conv_params.linear_spatial)
-    {
-      dst_oob_check = "DST_Y >= args.dst_tensor.Height() || DST_S >= "
-                      "args.dst_tensor.Slices()";
-    }
-    else
-    {
-      dst_oob_check = "DST_X >= args.dst_tensor.Width() || DST_Y >= "
-                      "args.dst_tensor.Height() || DST_S >= args.dst_tensor.Slices()";
-    }
-  }
-  c += "__kernel void main_function(\n";
-  c += "$0) {\n";
-  c += GenerateBlockCoords(conv_params.block_size, work_group_launch_order_,
-                           conv_params.linear_spatial, src_def.HasAxis(Axis::DEPTH));
-  if (!late_oob_check)
-  {
-    c += "  if (" + dst_oob_check + ") {\n";
-    c += "    return;\n";
-    c += "  }\n";
-  }
-  if (conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::LOCAL_MEM_BY_THREADS)
-  {
-    if (conv_params.linear_spatial)
-    {
-      c += "  int lid = get_local_id(0);\n";
-    }
-    else
-    {
-      c += "  int lid = get_local_id(1) * " + std::to_string(work_group_size_.x) +
-           " + get_local_id(0);\n";
-    }
-  }
-  if (use_simd_broadcast)
-  {
-    c += "  int simd_id = get_sub_group_local_id();\n";
-  }
-  for (int s = 0; s < block_size.w; ++s)
-  {
-    const std::string sind = std::to_string(s);
-    for (int z = 0; z < block_size.z; ++z)
-    {
-      const std::string zind = std::to_string(z);
-      for (int y = 0; y < block_size.y; ++y)
-      {
-        const std::string yind = std::to_string(y);
-        for (int x = 0; x < block_size.x; ++x)
-        {
-          const std::string xind = std::to_string(x);
-          c += "  ACCUM_FLT4 r" + generate_id_full(xind, yind, zind, sind) +
-               " = (ACCUM_FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n";
-        }
-      }
-    }
-  }
-  if (!conv_params_.x_kernel_is_1)
-  {
-    for (int x = 0; x < block_size.x; ++x)
-    {
-      const std::string xind = std::to_string(x);
-      const std::string xc = "(DST_X + " + xind + ")";
-      if (stride_correction)
-      {
-        c += "  int xc" + xind + " = " +
-             GetXStrideCorrected(xc, "args.src_tensor.Batch()", "args.stride_x", "args.padding_x") +
-             ";\n";
-      }
-      else
-      {
-        c += "  int xc" + xind + " = " + xc + " * args.stride_x + args.padding_x;\n";
-      }
-    }
-  }
-  else
-  {
-    for (int x = 0; x < block_size.x; ++x)
-    {
-      const std::string xind = std::to_string(x);
-      c += "  int xc" + xind + " = DST_X + " + xind + ";\n";
-      if (!src_def.CanReadOutOfBorder(Axis::WIDTH))
-      {
-        c += "  xc" + xind + " = clamp(xc" + xind + ", 0, args.src_tensor.Width() - 1);\n";
-      }
-    }
-  }
-  if (!conv_params_.y_kernel_is_1)
-  {
-    for (int y = 0; y < block_size.y; ++y)
-    {
-      const std::string yind = std::to_string(y);
-      const std::string yc = "(DST_Y + " + yind + ")";
-      c += "  int yc" + yind + " = " + yc + " * args.stride_y + args.padding_y;\n";
-    }
-  }
-  else
-  {
-    for (int y = 0; y < block_size.y; ++y)
-    {
-      const std::string yind = std::to_string(y);
-      c += "  int yc" + yind + " = DST_Y + " + yind + ";\n";
-      if (!src_def.CanReadOutOfBorder(Axis::HEIGHT))
-      {
-        c += "  yc" + yind + " = clamp(yc" + yind + ", 0, args.src_tensor.Height() - 1);\n";
-      }
-    }
-  }
-  if (src_def.HasAxis(Axis::DEPTH))
-  {
-    if (!conv_params_.z_kernel_is_1)
-    {
-      for (int z = 0; z < block_size.z; ++z)
-      {
-        const std::string zind = std::to_string(z);
-        const std::string zc = "(DST_Z + " + zind + ")";
-        c += "  int zc" + zind + " = " + zc + " * args.stride_z + args.padding_z;\n";
-      }
-    }
-    else
-    {
-      for (int z = 0; z < block_size.z; ++z)
-      {
-        const std::string zind = std::to_string(z);
-        c += "  int zc" + zind + " = DST_Z + " + zind + ";\n";
-        if (!src_def.CanReadOutOfBorder(Axis::DEPTH))
-        {
-          c += "  zc" + zind + " = clamp(zc" + zind + ", 0, args.src_tensor.Depth() - 1);\n";
-        }
-      }
-    }
-  }
-  bool trivial_kernel_size = conv_params_.x_kernel_is_1 && conv_params_.y_kernel_is_1;
-  if (src_def.HasAxis(Axis::DEPTH))
-  {
-    trivial_kernel_size = trivial_kernel_size && conv_params_.z_kernel_is_1;
-  }
-  if (need_local_mem)
-  {
-    c += "  __local " + weights_data_type + " weights_cache[" + std::to_string(local_mem_size) +
-         "];\n";
-  }
-  else if (conv_params.AreWeightsBuffer())
-  {
-    c += "    " + weights_global_ptr + " weights_cache;\n";
-  }
-  else if (!trivial_kernel_size)
-  {
-    c += "  int filter_offset = 0;\n";
-  }
-  if (conv_params.AreWeightsBuffer())
-  {
-    if (conv_params.different_weights_for_height)
-    {
-      c += "  " + weights_global_ptr +
-           " filters_loc = args.weights.GetPtr() + (DST_S * "
-           "args.src_tensor.Height() + DST_Y * " +
-           std::to_string(block_size.w) + ") * 4 * args.src_tensor.Slices();\n";
-    }
-    else
-    {
-      std::string kernel_spatial_offset = "";
-      if (!conv_params_.x_kernel_is_1)
-      {
-        kernel_spatial_offset += " * args.kernel_size_x";
-      }
-      if (!conv_params_.y_kernel_is_1)
-      {
-        kernel_spatial_offset += " * args.kernel_size_y";
-      }
-      if (src_def.HasAxis(Axis::DEPTH) && !conv_params_.z_kernel_is_1)
-      {
-        kernel_spatial_offset += " * args.kernel_size_z";
-      }
-      c += "  " + weights_global_ptr +
-           " filters_loc = args.weights.GetPtr() + DST_S * 4 * "
-           "args.src_tensor.Slices()" +
-           kernel_spatial_offset + ";\n";
-    }
-  }
-  if (src_def.HasAxis(Axis::DEPTH) && !conv_params_.z_kernel_is_1)
-  {
-    c += "  for (int kz = 0; kz < args.kernel_size_z; ++kz) {\n";
-    for (int z = 0; z < block_size.z; ++z)
-    {
-      const std::string zck = "zck" + std::to_string(z);
-      c += "  int zck" + std::to_string(z) + " = kz * args.dilation_z + zc" + std::to_string(z) +
-           ";\n";
-      if (!src_def.SupportsZeroClamp(Axis::DEPTH))
-      {
-        c += "  bool in_z" + std::to_string(z) + " = " + zck + " >= 0 && " + zck +
-             " < args.src_tensor.Depth();\n";
-        if (!src_def.CanReadOutOfBorder(Axis::DEPTH))
-        {
-          c += "  " + zck + " = clamp(" + zck + ", 0, args.src_tensor.Depth() - 1);\n";
-        }
-      }
-    }
-  }
-  if (!conv_params_.y_kernel_is_1)
-  {
-    c += "  for (int ky = 0; ky < args.kernel_size_y; ++ky) {\n";
-    for (int y = 0; y < block_size.y; ++y)
-    {
-      const std::string yck = "yck" + std::to_string(y);
-      c += "  int " + yck + " = ky * args.dilation_y + yc" + std::to_string(y) + ";\n";
-      if (!src_def.SupportsZeroClamp(Axis::HEIGHT))
-      {
-        c += "  bool in_y" + std::to_string(y) + " = " + yck + " >= 0 && " + yck +
-             " < args.src_tensor.Height();\n";
-        if (!src_def.CanReadOutOfBorder(Axis::HEIGHT))
-        {
-          c += "  " + yck + " = clamp(" + yck + ", 0, args.src_tensor.Height() - 1);\n";
-        }
-      }
-    }
-  }
-  if (!conv_params_.x_kernel_is_1)
-  {
-    c += "  for (int kx = 0; kx < args.kernel_size_x; ++kx) {\n";
-    for (int x = 0; x < block_size.x; ++x)
-    {
-      const std::string xck = "xck" + std::to_string(x);
-      c += "  int xck" + std::to_string(x) + " = kx * args.dilation_x + xc" + std::to_string(x) +
-           ";\n";
-      if (!src_def.SupportsZeroClamp(Axis::WIDTH))
-      {
-        c += "  bool in_x" + std::to_string(x) + " = " + xck + " >= 0 && " + xck +
-             " < args.src_tensor.Width();\n";
-        if (!src_def.CanReadOutOfBorder(Axis::WIDTH))
-        {
-          c += "  " + xck + " = clamp(" + xck + ", 0, args.src_tensor.Width() - 1);\n";
-        }
-      }
-    }
-  }
-  const bool need_multiple_slice_strides =
-    src_def.ReturnsZeroForNegOneRead() && !trivial_kernel_size;
-  for (int z = 0; z < block_size.z; ++z)
-  {
-    const std::string zind = std::to_string(z);
-    for (int y = 0; y < block_size.y; ++y)
-    {
-      const std::string yind = std::to_string(y);
-      for (int x = 0; x < block_size.x; ++x)
-      {
-        const std::string xind = std::to_string(x);
-        std::string xc = conv_params.x_kernel_is_1 ? "xc" + xind : "xck" + xind;
-        std::string yc = conv_params.y_kernel_is_1 ? "yc" + yind : "yck" + yind;
-        const std::string id = generate_id(xind, yind, zind);
-        std::string coords = "" + xc + ", " + yc;
-        if (src_def.HasAxis(Axis::DEPTH))
-        {
-          std::string zc = conv_params.z_kernel_is_1 ? "zc" + zind : "zck" + zind;
-          coords += ", " + zc;
-        }
-        if (src_def.IsLinear())
-        {
-          c += "  args.src_tensor.GetAddress(addr" + id + ", " + coords + ", 0);\n";
-          if (need_multiple_slice_strides)
-          {
-            const std::string check = generate_check(xind, yind, zind);
-            c += "  addr" + id + " = select(-1, addr" + id + ", (" + check + "));\n";
-            c +=
-              "  int ds" + id + " = select(0, args.src_tensor.SliceStride(), (" + check + "));\n";
-          }
-        }
-      }
-    }
-  }
-  if (src_def.IsLinear() && !need_multiple_slice_strides)
-  {
-    c += "  int ds = args.src_tensor.SliceStride();\n";
-  }
-
-  auto declare_src = [&]() {
-    for (int z = 0; z < block_size.z; ++z)
-    {
-      const std::string zind = std::to_string(z);
-      for (int y = 0; y < block_size.y; ++y)
-      {
-        const std::string yind = std::to_string(y);
-        for (int x = 0; x < block_size.x; ++x)
-        {
-          const std::string xind = std::to_string(x);
-          const std::string id = generate_id(xind, yind, zind);
-          c += "    " + weights_data_type + " src" + id + ";\n";
-        }
-      }
-    }
-  };
-  const bool conditional_read = device_info.IsMali();
-  auto read_src = [&]() {
-    const std::string cl_type = ToCLDataType(conv_params.weights_data_type);
-    for (int z = 0; z < block_size.z; ++z)
-    {
-      const std::string zind = std::to_string(z);
-      for (int y = 0; y < block_size.y; ++y)
-      {
-        const std::string yind = std::to_string(y);
-        for (int x = 0; x < block_size.x; ++x)
-        {
-          const std::string xind = std::to_string(x);
-          std::string id = generate_id(xind, yind, zind);
-          const std::string check = generate_check(xind, yind, zind);
-          std::string address;
-          if (src_def.IsLinear())
-          {
-            address = "addr" + id;
-          }
-          else
-          {
-            std::string xc = conv_params.x_kernel_is_1 ? "xc" + xind : "xck" + xind;
-            std::string yc = conv_params.y_kernel_is_1 ? "yc" + yind : "yck" + yind;
-            address = "" + xc + ", " + yc;
-            if (src_def.HasAxis(Axis::DEPTH))
-            {
-              std::string zc = conv_params.z_kernel_is_1 ? "zc" + zind : "zck" + zind;
-              address += ", " + zc;
-            }
-            address += ", s";
-          }
-          if (src_def.ReturnsZeroForNegOneRead())
-          {
-            c += "    src" + id + " = args.src_tensor.Read<" + cl_type + ">(" + address + ");\n";
-            const std::string ds = trivial_kernel_size ? "ds" : "ds" + id;
-            c += "    " + address + " += " + ds + ";\n";
-          }
-          else
-          {
-            if (!check.empty())
-            {
-              if (conditional_read)
-              {
-                c += "    src" + id + " = " + check + " ? args.src_tensor.Read<" + cl_type + ">(" +
-                     address + ") : (FLT4)(0.0f);\n";
-              }
-              else
-              {
-                c += "    src" + id + " = args.src_tensor.Read<" + cl_type + ">(" + address +
-                     ") * (FLT)(" + check + ");\n";
-              }
-            }
-            else
-            {
-              c += "    src" + id + " = args.src_tensor.Read<" + cl_type + ">(" + address + ");\n";
-            }
-            if (src_def.IsLinear())
-            {
-              c += "    " + address + " += ds;\n";
-            }
-          }
-        }
-      }
-    }
-  };
-  const bool weights_type_as_accum_type = !(op_def.precision == CalculationsPrecision::F32_F16 &&
-                                            conv_params.weights_data_type == DataType::FLOAT16);
-  auto conv_core = [&](int shared_offset) {
-    const std::string channels[] = {"x", "y", "z", "w"};
-    for (int s = 0; s < block_size.w; ++s)
-    {
-      const std::string sind = std::to_string(s);
-      if (weights_type_as_accum_type)
-      {
-        for (int ch = 0; ch < 4; ++ch)
-        {
-          for (int z = 0; z < block_size.z; ++z)
-          {
-            const std::string zind = std::to_string(z);
-            for (int y = 0; y < block_size.y; ++y)
-            {
-              const std::string yind = std::to_string(y);
-              for (int x = 0; x < block_size.x; ++x)
-              {
-                const std::string xind = std::to_string(x);
-                std::string R = "r" + generate_id_full(xind, yind, zind, sind);
-                std::string S = "src" + generate_id(xind, yind, zind);
-                if (use_simd_broadcast)
-                {
-                  int simd_id = (s * 4 + ch + shared_offset) / simd_size;
-                  int thread_id = (s * 4 + ch + shared_offset) % simd_size;
-                  std::string w_val_x = "sub_group_broadcast(simd_w" + std::to_string(simd_id) +
-                                        ".x, " + std::to_string(thread_id) + "u)";
-                  std::string w_val_y = "sub_group_broadcast(simd_w" + std::to_string(simd_id) +
-                                        ".y, " + std::to_string(thread_id) + "u)";
-                  std::string w_val_z = "sub_group_broadcast(simd_w" + std::to_string(simd_id) +
-                                        ".z, " + std::to_string(thread_id) + "u)";
-                  std::string w_val_w = "sub_group_broadcast(simd_w" + std::to_string(simd_id) +
-                                        ".w, " + std::to_string(thread_id) + "u)";
-                  c += "    " + R + ".x += " + w_val_x + " * " + S + "." + channels[ch] + ";\n";
-                  c += "    " + R + ".y += " + w_val_y + " * " + S + "." + channels[ch] + ";\n";
-                  c += "    " + R + ".z += " + w_val_z + " * " + S + "." + channels[ch] + ";\n";
-                  c += "    " + R + ".w += " + w_val_w + " * " + S + "." + channels[ch] + ";\n";
-                }
-                else
-                {
-                  const std::string weight_id = std::to_string(s * 4 + ch + shared_offset);
-                  std::string w_val;
-                  if (conv_params.AreWeightsBuffer())
-                  {
-                    w_val = "weights_cache[" + weight_id + "]";
-                  }
-                  else
-                  {
-                    w_val = "f" + weight_id;
-                  }
-                  c += "    " + R + " += " + w_val + " * " + S + "." + channels[ch] + ";\n";
-                }
-              }
-            }
-          }
-        }
-      }
-      else
-      { // F32_F16 precision and weights type is float16
-        for (int z = 0; z < block_size.z; ++z)
-        {
-          const std::string zind = std::to_string(z);
-          for (int y = 0; y < block_size.y; ++y)
-          {
-            const std::string yind = std::to_string(y);
-            for (int x = 0; x < block_size.x; ++x)
-            {
-              const std::string xind = std::to_string(x);
-              std::string R = "r" + generate_id_full(xind, yind, zind, sind);
-              std::string S = "src" + generate_id(xind, yind, zind);
-              std::vector<std::string> F(4);
-              for (int i = 0; i < 4; ++i)
-              {
-                std::string weight_id = std::to_string(s * 4 + i + shared_offset);
-                if (conv_params.AreWeightsBuffer())
-                {
-                  F[i] = "weights_cache[" + weight_id + "]";
-                }
-                else
-                {
-                  F[i] = "f" + weight_id;
-                }
-              }
-              c += "    " + R + " += convert_float4(" + S + ".x * " + F[0] + " + " + S + ".y * " +
-                   F[1] + " + " + S + ".z * " + F[2] + " + " + S + ".w * " + F[3] + ");\n";
-            }
-          }
-        }
-      }
-    }
-  };
-
-  c += "  int s = 0;\n";
-  c += "  do {\n";
-  declare_src();
-  const int total_work_items = work_group_size_.x * work_group_size_.y * work_group_size_.z;
-  if (conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP)
-  {
-    c += GenerateAsyncUpload("weights_cache", "filters_loc",
-                             /*global_offset_name*/ "", local_mem_size);
-  }
-  else if (conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::LOCAL_MEM_BY_THREADS)
-  {
-    c += "    barrier(CLK_LOCAL_MEM_FENCE);\n";
-    c +=
-      GenerateUploadByThreads("weights_cache", "filters_loc",
-                              /*global_offset_name*/ "", "lid", total_work_items, local_mem_size);
-  }
-  else if (use_simd_broadcast)
-  {
-    int parts = local_mem_size / simd_size;
-    int reminder = local_mem_size % simd_size;
-    for (int i = 0; i < parts; ++i)
-    {
-      c += "    FLT4 simd_w" + std::to_string(i) + " = filters_loc[simd_id + " +
-           std::to_string(i * simd_size) + "];\n";
-    }
-    if (reminder)
-    {
-      c += "    FLT4 simd_w" + std::to_string(parts) + ";\n";
-      c += "    if (simd_id < " + std::to_string(reminder) + ") {\n";
-      c += "      simd_w" + std::to_string(parts) + " = filters_loc[simd_id + " +
-           std::to_string(parts * simd_size) + "];\n";
-      c += "    }\n";
-    }
-  }
-  else if (conv_params.AreWeightsBuffer())
-  { // GLOBAL_MEM/CONSTANT_MEM
-    c += "    weights_cache = filters_loc;\n";
-  }
-  else
-  { // TEXTURES_MEM
-    for (int dst_s = 0; dst_s < block_size.w; ++dst_s)
-    {
-      std::string f_y = trivial_kernel_size ? "s" : "filter_offset";
-      if (conv_params.different_weights_for_height)
-      {
-        f_y = "DST_Y * args.src_tensor.Slices() + s";
-      }
-      c += absl::Substitute(
-        R"(    FLT4 f$2 = args.weights0.Read(DST_S + $0, $1);
-    FLT4 f$3 = args.weights1.Read(DST_S + $0, $1);
-    FLT4 f$4 = args.weights2.Read(DST_S + $0, $1);
-    FLT4 f$5 = args.weights3.Read(DST_S + $0, $1);
-)",
-        dst_s, f_y, dst_s * 4 + 0, dst_s * 4 + 1, dst_s * 4 + 2, dst_s * 4 + 3);
-    }
-    if (!trivial_kernel_size)
-    {
-      c += "    filter_offset++;\n";
-    }
-  }
-  read_src();
-  c += "    s += 1;\n";
-  if (conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::LOCAL_MEM_BY_THREADS)
-  {
-    c += "    barrier(CLK_LOCAL_MEM_FENCE);\n";
-  }
-  conv_core(0);
-  for (int i = 1; i < conv_params.src_depth_loop_size; ++i)
-  {
-    read_src();
-    conv_core(i * block_size.w * 4);
-    c += "    s += 1;\n";
-  }
-  if (conv_params.AreWeightsBuffer())
-  {
-    c += "    filters_loc += " + std::to_string(local_mem_size) + ";\n";
-  }
-  c += "  } while (s < args.src_tensor.Slices());\n";
-  if (!conv_params.x_kernel_is_1)
-  {
-    c += "  };\n";
-  }
-  if (!conv_params.y_kernel_is_1)
-  {
-    c += "  };\n";
-  }
-  if (src_def.HasAxis(Axis::DEPTH) && !conv_params_.z_kernel_is_1)
-  {
-    c += "  };\n";
-  }
-  if (conv_params.AreWeightsBuffer())
-  {
-    if (conv_params.weights_upload_type == ConvPowerVR::WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP)
-    {
-      c += GenerateAsyncUpload("weights_cache", "args.biases.GetPtr()", "DST_S", block_size.w);
-    }
-    else if (conv_params.weights_upload_type ==
-             ConvPowerVR::WeightsUploadType::LOCAL_MEM_BY_THREADS)
-    {
-      c += "  barrier(CLK_LOCAL_MEM_FENCE);\n";
-      c += GenerateUploadByThreads("weights_cache", "args.biases.GetPtr()", "DST_S", "lid",
-                                   total_work_items, block_size.w);
-      c += "  barrier(CLK_LOCAL_MEM_FENCE);\n";
-    }
-    else
-    {
-      c += "  weights_cache = args.biases.GetPtr() + DST_S;\n";
-    }
-  }
-  if (late_oob_check)
-  {
-    c += "  if (" + dst_oob_check + ") {\n";
-    c += "    return;\n";
-    c += "  }\n";
-  }
-
-  auto generate_dst_check = [&](int x, int y, int z) {
-    std::string check;
-    const std::vector<Axis> axes{Axis::WIDTH, Axis::HEIGHT, Axis::DEPTH};
-    const std::vector<std::string> names{"Width()", "Height()", "Depth()"};
-    std::vector<std::string> coords(3);
-    coords[0] = "DST_X + " + std::to_string(x);
-    coords[1] = "DST_Y + " + std::to_string(y);
-    coords[2] = "DST_Z + " + std::to_string(z);
-    const std::vector<int> ids{x, y, z};
-    for (size_t i = 0; i < axes.size(); ++i)
-    {
-      const auto &axis = axes[i];
-      if (src_def.HasAxis(axis) && ids[i] != 0)
-      {
-        if (!check.empty())
-        {
-          check += " && ";
-        }
-        check += coords[i] + " < args.dst_tensor." + names[i];
-      }
-    }
-    return check;
-  };
-
-  for (int s = 0; s < block_size.w; ++s)
-  {
-    const std::string sind = std::to_string(s);
-    c += "  if (DST_S + " + sind + " >= args.dst_tensor.Slices()) return;\n";
-    c += "  {\n";
-    if (conv_params.AreWeightsBuffer())
-    {
-      c += "    FLT4 bias_val = TO_FLT4(weights_cache[" + sind + "]);\n";
-    }
-    else
-    {
-      c += "    FLT4 bias_val = args.biases.Read(DST_S + " + sind + ");\n";
-    }
-    for (int z = 0; z < block_size.z; ++z)
-    {
-      const std::string zind = std::to_string(z);
-      for (int y = 0; y < block_size.y; ++y)
-      {
-        const std::string yind = std::to_string(y);
-        for (int x = 0; x < block_size.x; ++x)
-        {
-          const std::string xind = std::to_string(x);
-          const std::string id = generate_id_full(xind, yind, zind, sind);
-          const std::string check = generate_dst_check(x, y, z);
-          std::string coords = "DST_X + " + xind + ", DST_Y + " + yind;
-          if (src_def.HasAxis(Axis::DEPTH))
-          {
-            coords += ", DST_Z + " + zind;
-          }
-          coords += ", DST_S + " + sind;
-          if (!check.empty())
-          {
-            c += "  if (" + check + ") {\n";
-          }
-          else
-          {
-            c += "  {\n";
-          }
-          c += "    FLT4 res = TO_FLT4(r" + id + ") + bias_val;\n";
-          c += "    args.dst_tensor.Write(res, " + coords + ");\n";
-          c += "  }\n";
-        }
-      }
-    }
-    c += "  }\n";
-  }
-  c += "}\n";
-  return c;
-}
-
-ConvPowerVR::ConvParams
-ConvPowerVR::GuessBestParams(const DeviceInfo &device_info, const OperationDef &definition,
-                             int src_depth, int dst_depth, bool x_kernel_is_1, bool y_kernel_is_1,
-                             bool different_weights_for_height, const BHWC *dst_shape)
-{
-  ConvParams conv_params;
-  conv_params.linear_spatial = false;
-  conv_params.weights_data_type = DeduceDataTypeFromPrecision(definition.precision);
-  conv_params.x_kernel_is_1 = x_kernel_is_1;
-  conv_params.y_kernel_is_1 = y_kernel_is_1;
-  conv_params.different_weights_for_height = different_weights_for_height;
-  if (device_info.IsNvidia())
-  {
-    if (different_weights_for_height)
-    {
-      work_group_size_ = int3(32, 1, 1);
-      work_group_launch_order_ = int3(2, 0, 1);
-      conv_params.fixed_work_group_size = true;
-    }
-    else
-    {
-      conv_params.linear_spatial = true;
-      work_group_size_ = int3(32, 1, 1);
-      work_group_launch_order_ = int3(1, 0, 2);
-      conv_params.fixed_work_group_size = true;
-    }
-    conv_params.block_size = int4(2, 1, 1, 4);
-    conv_params.src_depth_loop_size = 1;
-    conv_params.weights_upload_type = WeightsUploadType::LOCAL_MEM_BY_THREADS;
-    if (dst_depth % 4 == 0 || dst_depth >= 8)
-    {
-      conv_params.block_size.w = 4;
-    }
-    else if (dst_depth % 2 == 0 || dst_depth >= 4)
-    {
-      conv_params.block_size.w = 2;
-    }
-    else
-    {
-      conv_params.block_size.w = dst_depth;
-    }
-    if (dst_shape)
-    {
-      int task_size = dst_shape->w * dst_shape->b * dst_shape->h * dst_depth;
-      float task_size_per_cu = static_cast<float>(task_size) / device_info.compute_units_count;
-      int block_size =
-        conv_params.block_size.x * conv_params.block_size.y * conv_params.block_size.w;
-      float threads_per_cu = task_size_per_cu / block_size;
-      float warps_per_cu = threads_per_cu / 32 /*warp_size*/;
-      if (warps_per_cu < 8.0f)
-      {
-        conv_params.block_size.x = 1;
-      }
-      if (warps_per_cu < 4.0f && conv_params.block_size.w >= 4)
-      {
-        conv_params.block_size.w /= 2;
-      }
-      if (warps_per_cu < 2.0f && conv_params.block_size.w >= 2)
-      {
-        conv_params.block_size.w /= 2;
-      }
-    }
-    if (src_depth % 2 == 0)
-    {
-      conv_params.src_depth_loop_size = 2;
-    }
-    if (src_depth % 4 == 0 && conv_params.block_size.w <= 2)
-    {
-      conv_params.src_depth_loop_size = 4;
-    }
-  }
-  else if (device_info.IsPowerVR())
-  {
-    if (different_weights_for_height)
-    {
-      work_group_size_ = int3(32, 1, 1);
-      work_group_launch_order_ = int3(2, 0, 1);
-      conv_params.fixed_work_group_size = true;
-    }
-    else
-    {
-      conv_params.linear_spatial = true;
-      work_group_size_ = int3(32, 1, 1);
-      work_group_launch_order_ = int3(1, 0, 2);
-      conv_params.fixed_work_group_size = true;
-    }
-    conv_params.weights_data_type =
-      definition.precision == CalculationsPrecision::F16 ? DataType::FLOAT16 : DataType::FLOAT32;
-    conv_params.block_size = int4(1, 1, 1, 4);
-    conv_params.src_depth_loop_size = 1;
-    conv_params.weights_upload_type = WeightsUploadType::LOCAL_MEM_ASYNC_SUBGROUP;
-    if (dst_depth % 8 == 0 || dst_depth >= 32)
-    {
-      conv_params.block_size.w = 8;
-    }
-    else if (dst_depth % 4 == 0 || dst_depth >= 8)
-    {
-      conv_params.block_size.w = 4;
-    }
-    else if (dst_depth % 2 == 0 || dst_depth >= 4)
-    {
-      conv_params.block_size.w = 2;
-    }
-    else
-    {
-      conv_params.block_size.w = dst_depth;
-    }
-    if (definition.precision == CalculationsPrecision::F16)
-    {
-      conv_params.block_size.w = std::min(4, conv_params.block_size.w);
-      if (src_depth % 2 == 0)
-      {
-        conv_params.src_depth_loop_size = 2;
-      }
-      if (src_depth % 4 == 0 && conv_params.block_size.w <= 2)
-      {
-        conv_params.src_depth_loop_size = 4;
-      }
-      if (conv_params.block_size.w == 1)
-      {
-        if (src_depth % 2 == 0)
-        {
-          conv_params.src_depth_loop_size = 2;
-        }
-        if (src_depth % 4 == 0)
-        {
-          conv_params.src_depth_loop_size = 4;
-        }
-        if (src_depth <= 8)
-        {
-          conv_params.src_depth_loop_size = src_depth;
-        }
-      }
-      conv_params.block_size.x = 2;
-    }
-  }
-  else if (device_info.IsAMD())
-  {
-    if (different_weights_for_height)
-    {
-      work_group_size_ = int3(32, 1, 1);
-      work_group_launch_order_ = int3(2, 0, 1);
-      conv_params.fixed_work_group_size = true;
-    }
-    else
-    {
-      work_group_size_ = int3(8, 4, 1);
-      work_group_launch_order_ = int3(2, 0, 1);
-      conv_params.fixed_work_group_size = true;
-    }
-
-    conv_params.block_size = int4(2, 1, 1, 1);
-    if (x_kernel_is_1 && y_kernel_is_1)
-    {
-      conv_params.block_size.y = 2;
-    }
-    conv_params.src_depth_loop_size = 1;
-    conv_params.weights_upload_type = WeightsUploadType::CONSTANT_MEM;
-    if (dst_depth % 8 == 0 || dst_depth >= 32)
-    {
-      conv_params.block_size.w = 8;
-    }
-    else if (dst_depth % 4 == 0 || dst_depth >= 8)
-    {
-      conv_params.block_size.w = 4;
-    }
-    else if (dst_depth % 2 == 0 || dst_depth >= 4)
-    {
-      conv_params.block_size.w = 2;
-    }
-    else
-    {
-      conv_params.block_size.w = 1;
-    }
-    if (src_depth % 2 == 0 && src_depth >= 16)
-    {
-      conv_params.src_depth_loop_size = 2;
-    }
-  }
-  else if (device_info.IsMali())
-  {
-    int block_size = 2;
-    if (dst_shape)
-    {
-      int task_size = dst_shape->w * dst_shape->b * dst_shape->h * dst_depth;
-      block_size = GetRecommendedBlockSizeForConv(device_info, definition.precision, task_size);
-    }
-    if (!x_kernel_is_1 || !y_kernel_is_1)
-    {
-      block_size = std::min(block_size, 4);
-    }
-    if (block_size == 8)
-    {
-      if (dst_depth == 1 || dst_depth == 3)
-      {
-        conv_params.block_size = int4(2, 2, 1, 1);
-      }
-      else
-      {
-        conv_params.block_size = int4(2, 2, 1, 2);
-      }
-    }
-    else if (block_size == 4)
-    {
-      if (dst_depth == 1 || dst_depth == 3)
-      {
-        conv_params.block_size = int4(2, 2, 1, 1);
-      }
-      else
-      {
-        conv_params.block_size = int4(2, 1, 1, 2);
-      }
-    }
-    else if (block_size == 2)
-    {
-      conv_params.block_size = int4(2, 1, 1, 1);
-    }
-    else
-    {
-      conv_params.block_size = int4(1, 1, 1, 1);
-    }
-    conv_params.src_depth_loop_size = 1;
-    MaliInfo mali_info = device_info.mali_info;
-    if (src_depth % 2 == 0 && block_size <= 2 && !mali_info.IsMidgard())
-    {
-      conv_params.src_depth_loop_size = 2;
-    }
-    if (src_depth % 4 == 0 && block_size == 1 && !mali_info.IsMidgard() &&
-        definition.precision == CalculationsPrecision::F16)
-    {
-      conv_params.src_depth_loop_size = 4;
-    }
-    work_group_size_ = int3(4, 4, 1);
-    work_group_launch_order_ = int3(0, 1, 2);
-    conv_params.fixed_work_group_size = false;
-    conv_params.weights_upload_type = WeightsUploadType::GLOBAL_MEM;
-  }
-  else if (device_info.IsAdreno())
-  {
-    conv_params.block_size = int4(2, 2, 1, 2);
-    if (device_info.IsAdreno3xx())
-    {
-      if (definition.precision == CalculationsPrecision::F16)
-      {
-        conv_params.block_size = int4(2, 2, 1, 2);
-      }
-      else if (definition.precision == CalculationsPrecision::F32_F16)
-      {
-        conv_params.block_size = int4(2, 1, 1, 2);
-      }
-      else
-      { // F32
-        conv_params.block_size = int4(2, 2, 1, 1);
-      }
-    }
-    work_group_size_ = int3(8, 2, 1);
-    work_group_launch_order_ = int3(0, 1, 2);
-    conv_params.fixed_work_group_size = false;
-    conv_params.src_depth_loop_size = 1;
-    if (definition.src_tensors.size() == 2)
-    {
-      // dynamic weights supported only with buffers.
-      conv_params.weights_upload_type = WeightsUploadType::GLOBAL_MEM;
-    }
-    else
-    {
-      conv_params.weights_upload_type = WeightsUploadType::TEXTURES_MEM_X4;
-    }
-  }
-  else if (device_info.IsIntel())
-  {
-    if (different_weights_for_height)
-    {
-      work_group_size_ = int3(16, 1, 1);
-      work_group_launch_order_ = int3(0, 1, 2);
-      conv_params.fixed_work_group_size = true;
-    }
-    else
-    {
-      conv_params.linear_spatial = true;
-      work_group_size_ = int3(16, 1, 1);
-      work_group_launch_order_ = int3(0, 1, 2);
-      conv_params.fixed_work_group_size = true;
-    }
-    conv_params.block_size = int4(1, 1, 1, 4);
-    conv_params.src_depth_loop_size = 1;
-    int sub_group_size = 16;
-    const bool supports_subgroups = device_info.SupportsExtension("cl_khr_subgroups") ||
-                                    device_info.SupportsExtension("cl_intel_subgroups");
-    if (definition.precision != CalculationsPrecision::F32_F16 && supports_subgroups &&
-        device_info.SupportsExtension("cl_intel_required_subgroup_size") &&
-        device_info.SupportsSubGroupWithSize(sub_group_size))
-    {
-      conv_params.weights_upload_type = WeightsUploadType::PRIVATE_MEM_SIMD_BROADCAST;
-      conv_params.simd_size = sub_group_size;
-    }
-    else
-    {
-      conv_params.weights_upload_type = WeightsUploadType::LOCAL_MEM_BY_THREADS;
-    }
-    if (dst_depth % 4 == 0 || dst_depth >= 8)
-    {
-      conv_params.block_size.w = 4;
-    }
-    else if (dst_depth % 2 == 0 || dst_depth >= 4)
-    {
-      conv_params.block_size.w = 2;
-    }
-    else
-    {
-      conv_params.block_size.w = dst_depth;
-    }
-    if (src_depth % 2 == 0)
-    {
-      conv_params.src_depth_loop_size = 2;
-    }
-    if (src_depth % 4 == 0 && conv_params.block_size.w <= 2)
-    {
-      conv_params.src_depth_loop_size = 4;
-    }
-  }
-  else
-  {
-    conv_params.block_size = int4(1, 1, 1, 4);
-    work_group_size_ = int3(8, 2, 1);
-    work_group_launch_order_ = int3(0, 1, 2);
-    conv_params.fixed_work_group_size = false;
-    conv_params.src_depth_loop_size = 1;
-    conv_params.weights_upload_type = WeightsUploadType::GLOBAL_MEM;
-    if (dst_depth % 4 == 0 || dst_depth >= 8)
-    {
-      conv_params.block_size.w = 4;
-    }
-    else if (dst_depth % 2 == 0 || dst_depth >= 4)
-    {
-      conv_params.block_size.w = 2;
-    }
-    else
-    {
-      conv_params.block_size.w = dst_depth;
-    }
-    if (src_depth % 2 == 0)
-    {
-      conv_params.src_depth_loop_size = 2;
-    }
-    if (src_depth % 4 == 0 && conv_params.block_size.w <= 2)
-    {
-      conv_params.src_depth_loop_size = 4;
-    }
-  }
-
-  return conv_params;
-}
-
-ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(const DeviceInfo &device_info,
-                                                     const OperationDef &definition,
-                                                     const Convolution2DAttributes &attr,
-                                                     const BHWC *dst_shape)
-{
-  const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4);
-  const int src_depth = DivideRoundUp(attr.weights.shape.i, 4);
-  const bool x_kernel_is_1 = attr.weights.shape.w == 1 && attr.strides.w == 1 &&
-                             attr.dilations.w == 1 && attr.padding.prepended.w == 0 &&
-                             attr.padding.appended.w == 0;
-  const bool y_kernel_is_1 = attr.weights.shape.h == 1 && attr.strides.h == 1 &&
-                             attr.dilations.h == 1 && attr.padding.prepended.h == 0 &&
-                             attr.padding.appended.h == 0;
-  return GuessBestParams(device_info, definition, src_depth, dst_depth, x_kernel_is_1,
-                         y_kernel_is_1, false, dst_shape);
-}
-
-ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(const DeviceInfo &device_info,
-                                                     const OperationDef &definition,
-                                                     const Convolution3DAttributes &attr,
-                                                     const BHWDC *dst_shape)
-{
-  const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4);
-  const int src_depth = DivideRoundUp(attr.weights.shape.i, 4);
-  const bool x_kernel_is_1 = attr.weights.shape.w == 1 && attr.strides.w == 1 &&
-                             attr.dilations.w == 1 && attr.padding.prepended.w == 0 &&
-                             attr.padding.appended.w == 0;
-  const bool y_kernel_is_1 = attr.weights.shape.h == 1 && attr.strides.h == 1 &&
-                             attr.dilations.h == 1 && attr.padding.prepended.h == 0 &&
-                             attr.padding.appended.h == 0;
-  const bool z_kernel_is_1 = attr.weights.shape.d == 1 && attr.strides.d == 1 &&
-                             attr.dilations.d == 1 && attr.padding.prepended.d == 0 &&
-                             attr.padding.appended.d == 0;
-
-  ConvPowerVR::ConvParams result;
-  BHWC shape;
-  if (dst_shape)
-  {
-    shape.b = dst_shape->b;
-    shape.h = dst_shape->h * dst_shape->d;
-    shape.w = dst_shape->w;
-    shape.c = dst_shape->c;
-    result = GuessBestParams(device_info, definition, src_depth, dst_depth, x_kernel_is_1,
-                             y_kernel_is_1, false, &shape);
-  }
-  else
-  {
-    result = GuessBestParams(device_info, definition, src_depth, dst_depth, x_kernel_is_1,
-                             y_kernel_is_1, false, nullptr);
-  }
-  result.z_kernel_is_1 = z_kernel_is_1;
-  return result;
-}
-
-ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(const DeviceInfo &device_info,
-                                                     const OperationDef &definition,
-                                                     const Convolution2DAttributes &attr,
-                                                     const BHWC &weights_shape,
-                                                     const BHWC *dst_shape)
-{
-  const int dst_depth = DivideRoundUp(weights_shape.b, 4);
-  const int src_depth = DivideRoundUp(weights_shape.c, 4);
-  const bool x_kernel_is_1 = weights_shape.w == 1 && attr.strides.w == 1 && attr.dilations.w == 1 &&
-                             attr.padding.prepended.w == 0 && attr.padding.appended.w == 0;
-  const bool y_kernel_is_1 = weights_shape.h == 1 && attr.strides.h == 1 && attr.dilations.h == 1 &&
-                             attr.padding.prepended.h == 0 && attr.padding.appended.h == 0;
-  return GuessBestParams(device_info, definition, src_depth, dst_depth, x_kernel_is_1,
-                         y_kernel_is_1, false, dst_shape);
-}
-
-ConvPowerVR::ConvParams ConvPowerVR::GuessBestParams(const DeviceInfo &device_info,
-                                                     const OperationDef &definition,
-                                                     const FullyConnectedAttributes &attr,
-                                                     const BHWC *dst_shape)
-{
-  const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4);
-  const int src_depth = DivideRoundUp(attr.weights.shape.i, 4);
-  ConvPowerVR::ConvParams params =
-    GuessBestParams(device_info, definition, src_depth, dst_depth, true, true, false, dst_shape);
-  work_group_size_.x *= work_group_size_.y;
-  work_group_size_.y = 1;
-  params.block_size.x *= params.block_size.y;
-  params.block_size.y = 1;
-  return params;
-}
-
-ConvPowerVR::ConvParams ConvPowerVR::GuessBestParamsWinograd(const DeviceInfo &device_info,
-                                                             const OperationDef &definition,
-                                                             const Convolution2DAttributes &attr,
-                                                             const BHWC *dst_shape)
-{
-  const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4);
-  const int src_depth = DivideRoundUp(attr.weights.shape.i, 4);
-  ConvPowerVR::ConvParams params =
-    GuessBestParams(device_info, definition, src_depth, dst_depth, true, true, true, dst_shape);
-  params.block_size.x *= params.block_size.y;
-  params.block_size.y = 1;
-  return params;
-}
-
-ConvPowerVR CreateConvPowerVR(const DeviceInfo &device_info, const OperationDef &definition,
-                              const Convolution2DAttributes &attr, const BHWC *dst_shape)
-{
-  ConvPowerVR result(definition, attr, device_info, dst_shape);
-  result.GenerateCode(device_info);
-  result.UploadData(attr.weights, attr.bias);
-  return result;
-}
-
-ConvPowerVR CreateConvPowerVR(const DeviceInfo &device_info, const OperationDef &definition,
-                              const FullyConnectedAttributes &attr, const BHWC *dst_shape)
-{
-  ConvPowerVR result(definition, attr, device_info, dst_shape);
-  result.GenerateCode(device_info);
-  result.UploadData(attr.weights, attr.bias);
-  return result;
-}
-
-ConvPowerVR CreateConvPowerVRDynamicWeights(const DeviceInfo &device_info,
-                                            const OperationDef &definition,
-                                            const Convolution2DAttributes &attr,
-                                            const BHWC &weights_shape, const BHWC *dst_shape)
-{
-  ConvPowerVR result(definition, attr, weights_shape, device_info, dst_shape);
-  result.GenerateCode(device_info);
-  result.UploadBias(attr.bias);
-  return result;
-}
-
-ConvPowerVR CreateConvPowerVRWino4x4To6x6(const DeviceInfo &device_info,
-                                          const OperationDef &definition,
-                                          const Convolution2DAttributes &attr,
-                                          const BHWC *dst_shape)
-{
-  ConvPowerVR result(definition);
-  result.conv_params_ = result.GuessBestParamsWinograd(device_info, definition, attr, dst_shape);
-  result.GenerateCode(device_info);
-  result.UploadDataForWinograd4x4To6x6(attr.weights);
-  return result;
-}
-
-ConvPowerVR CreateConvPowerVR3D(const DeviceInfo &device_info, const OperationDef &definition,
-                                const Convolution3DAttributes &attr, const BHWDC *dst_shape)
-{
-  ConvPowerVR result(definition, attr, device_info, dst_shape);
-  result.GenerateCode(device_info);
-  result.UploadWeights(attr.weights);
-  result.UploadBias(attr.bias);
-  return result;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvPowervr.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvPowervr.h
deleted file mode 100644
index f83f05730..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvPowervr.h
+++ /dev/null
@@ -1,413 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_POWERVR_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_POWERVR_H__
-
-#include <cstring>
-#include <vector>
-
-#include "open_cl/Buffer.h"
-#include "open_cl/ClDevice.h"
-#include "open_cl/kernels/ConvCommon.h"
-#include "open_cl/kernels/GpuOperation.h"
-#include "open_cl/kernels/Util.h"
-#include "open_cl/LinearStorage.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Texture2d.h"
-#include "open_cl/Util.h"
-#include "open_cl/DataType.h"
-#include "open_cl/Operations.h"
-#include "open_cl/Shape.h"
-#include "open_cl/Status.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Types.h"
-#include "open_cl/WinogradUtil.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-class ConvPowerVR : public GPUOperation
-{
-public:
-  ConvPowerVR() = default;
-  void GetPossibleKernelWorkGroups(TuningType tuning_type, const DeviceInfo &device_info,
-                                   const KernelInfo &kernel_info,
-                                   std::vector<int3> *work_groups) const override;
-  absl::Status BindArguments(ArgumentsBinder *args) override;
-  int3 GetGridSize() const override;
-
-  ConvWeightsDescription GetConvWeightsDescription() const
-  {
-    ConvWeightsDescription desc;
-    desc.layout = ConvWeightsLayout::kOHWIOGroupI4O4;
-    desc.output_group_size = conv_params_.block_size.w;
-    return desc;
-  }
-
-  // Move only
-  ConvPowerVR(ConvPowerVR &&operation);
-  ConvPowerVR &operator=(ConvPowerVR &&operation);
-  ConvPowerVR(const ConvPowerVR &) = delete;
-  ConvPowerVR &operator=(const ConvPowerVR &) = delete;
-
-private:
-  enum class WeightsUploadType
-  {
-    LOCAL_MEM_ASYNC_SUBGROUP, // we use it for PowerVR with workgroup size = 32
-    LOCAL_MEM_BY_THREADS,
-    GLOBAL_MEM,
-    CONSTANT_MEM,
-    PRIVATE_MEM_SIMD_BROADCAST,
-    TEXTURES_MEM_X4, // 4 textures for weights
-  };
-
-  struct ConvParams
-  {
-    // Usually we use this combinations for CalculationPrecision:
-    // F32: all F32
-    // F16: all F16
-    // F32_F16: all besides accumulator is F16, including weights
-    // But for PowerVR we can achieve better performance in F32_F16 with F32
-    // weights, so for PowerVR in this kernel we have F32 weights for
-    // F32_F16 precision mode
-    DataType weights_data_type; // used for weights and biases
-    int4 block_size;            // WHDS
-    bool fixed_work_group_size;
-    bool linear_spatial; // spatial dimensions are Width/Height/Depth
-    bool different_weights_for_height;
-    int src_depth_loop_size;
-    WeightsUploadType weights_upload_type;
-    bool x_kernel_is_1;
-    bool y_kernel_is_1;
-    bool z_kernel_is_1;
-
-    // used only with PRIVATE_MEM_SIMD_BROADCAST
-    int simd_size = 1;
-
-    bool AreWeightsBuffer() const
-    {
-      return weights_upload_type != WeightsUploadType::TEXTURES_MEM_X4;
-    }
-
-    bool IsPrivateMemBroadcast() const
-    {
-      return weights_upload_type == WeightsUploadType::PRIVATE_MEM_SIMD_BROADCAST;
-    }
-  };
-
-  ConvPowerVR(const OperationDef &definition, const Convolution2DAttributes &attr,
-              const DeviceInfo &device_info, const BHWC *dst_shape = nullptr);
-  ConvPowerVR(const OperationDef &definition, const Convolution2DAttributes &attr,
-              const BHWC &weights_shape, const DeviceInfo &device_info,
-              const BHWC *dst_shape = nullptr);
-  ConvPowerVR(const OperationDef &definition, const FullyConnectedAttributes &attr,
-              const DeviceInfo &device_info, const BHWC *dst_shape = nullptr);
-  explicit ConvPowerVR(const OperationDef &definition);
-  ConvPowerVR(const OperationDef &definition, const Convolution3DAttributes &attr,
-              const DeviceInfo &device_info, const BHWDC *dst_shape = nullptr);
-
-  void GenerateCode(const DeviceInfo &device_info);
-
-  template <DataType T>
-  void UploadData(const InternalTensor<OHWI, T> &weights, const InternalTensor<Linear, T> &biases);
-  template <DataType T> void UploadDataForWinograd4x4To6x6(const InternalTensor<OHWI, T> &weights);
-
-  template <DataType T> void UploadWeights(const InternalTensor<OHWI, T> &weights);
-
-  template <DataType T> void UploadWeights(const InternalTensor<OHWDI, T> &weights);
-
-  template <DataType T> void UploadBias(const InternalTensor<Linear, T> &bias);
-
-  friend ConvPowerVR CreateConvPowerVR(const DeviceInfo &device_info,
-                                       const OperationDef &definition,
-                                       const Convolution2DAttributes &attr, const BHWC *dst_shape);
-
-  friend ConvPowerVR CreateConvPowerVR(const DeviceInfo &device_info,
-                                       const OperationDef &definition,
-                                       const FullyConnectedAttributes &attr, const BHWC *dst_shape);
-
-  friend ConvPowerVR CreateConvPowerVRDynamicWeights(const DeviceInfo &device_info,
-                                                     const OperationDef &definition,
-                                                     const Convolution2DAttributes &attr,
-                                                     const BHWC &weights_shape,
-                                                     const BHWC *dst_shape);
-
-  friend ConvPowerVR CreateConvPowerVRWino4x4To6x6(const DeviceInfo &device_info,
-                                                   const OperationDef &definition,
-                                                   const Convolution2DAttributes &attr,
-                                                   const BHWC *dst_shape);
-
-  friend ConvPowerVR CreateConvPowerVR3D(const DeviceInfo &device_info,
-                                         const OperationDef &definition,
-                                         const Convolution3DAttributes &attr,
-                                         const BHWDC *dst_shape);
-
-  ConvParams GuessBestParams(const DeviceInfo &device_info, const OperationDef &definition,
-                             const Convolution2DAttributes &attr, const BHWC *dst_shape = nullptr);
-  ConvParams GuessBestParams(const DeviceInfo &device_info, const OperationDef &definition,
-                             const Convolution2DAttributes &attr, const BHWC &weights_shape,
-                             const BHWC *dst_shape = nullptr);
-  ConvParams GuessBestParams(const DeviceInfo &device_info, const OperationDef &definition,
-                             const FullyConnectedAttributes &attr, const BHWC *dst_shape = nullptr);
-  ConvParams GuessBestParamsWinograd(const DeviceInfo &device_info, const OperationDef &definition,
-                                     const Convolution2DAttributes &attr,
-                                     const BHWC *dst_shape = nullptr);
-  ConvParams GuessBestParams(const DeviceInfo &device_info, const OperationDef &definition,
-                             const Convolution3DAttributes &attr, const BHWDC *dst_shape = nullptr);
-  ConvParams GuessBestParams(const DeviceInfo &device_info, const OperationDef &definition,
-                             int src_depth, int dst_depth, bool x_kernel_is_1, bool y_kernel_is_1,
-                             bool different_weights_for_height, const BHWC *dst_shape = nullptr);
-
-  std::string GenerateConv(const DeviceInfo &device_info, const OperationDef &op_def,
-                           bool stride_correction, const ConvParams &conv_params);
-
-  int4 stride_;
-  int4 padding_;
-  int4 kernel_size_;
-  int4 dilation_;
-  ConvParams conv_params_;
-};
-
-template <DataType T>
-void ConvPowerVR::UploadData(const InternalTensor<OHWI, T> &weights,
-                             const InternalTensor<Linear, T> &biases)
-{
-  UploadWeights(weights);
-  UploadBias(biases);
-}
-
-template <DataType T>
-void ConvPowerVR::UploadDataForWinograd4x4To6x6(const InternalTensor<OHWI, T> &weights)
-{
-  InternalTensor<OHWI, T> wino_weights;
-  RearrangeWeightsToWinograd4x4To6x6Weights(weights, &wino_weights);
-  UploadWeights(wino_weights);
-  InternalTensor<Linear, DataType::FLOAT32> biases;
-  biases.shape = Linear(weights.shape.o);
-  biases.data.resize(weights.shape.o, 0.0f);
-  UploadBias(biases);
-}
-
-template <DataType T> void ConvPowerVR::UploadBias(const InternalTensor<Linear, T> &bias)
-{
-  BufferDescriptor desc;
-  desc.element_type = conv_params_.weights_data_type;
-  desc.element_size = 4;
-  desc.memory_type =
-    conv_params_.weights_upload_type == ConvPowerVR::WeightsUploadType::CONSTANT_MEM
-      ? MemoryType::CONSTANT
-      : MemoryType::GLOBAL;
-  const int float_size = sizeof(float);
-  // TODO
-  // conv_params_.weights_data_type == DataType::FLOAT32 ? sizeof(float) : sizeof(half);
-  int aligned_channels = AlignByN(bias.shape.v, 4 * conv_params_.block_size.w);
-  desc.size = float_size * aligned_channels;
-  desc.data.resize(desc.size);
-  if (conv_params_.weights_data_type == DataType::FLOAT32)
-  {
-    float *gpu_data = reinterpret_cast<float *>(desc.data.data());
-    for (int i = 0; i < aligned_channels; ++i)
-    {
-      gpu_data[i] = i < bias.shape.v ? bias.data[i] : 0.0f;
-    }
-  }
-  //   else
-  //   {
-  //     half *gpu_data = reinterpret_cast<half *>(desc.data.data());
-  //     for (int i = 0; i < aligned_channels; ++i)
-  //     {
-  //       gpu_data[i] = i < bias.shape.v ? bias.data[i] : 0.0f;
-  //     }
-  //   }
-  args_.AddObject("biases", absl::make_unique<BufferDescriptor>(std::move(desc)));
-}
-
-template <DataType T> void ConvPowerVR::UploadWeights(const InternalTensor<OHWI, T> &weights)
-{
-  const int dst_slices = AlignByN(DivideRoundUp(weights.shape.o, 4), conv_params_.block_size.w);
-  const int src_slices = DivideRoundUp(weights.shape.i, 4);
-
-  const bool f32_weights = conv_params_.weights_data_type == DataType::FLOAT32;
-  const int float4_size = sizeof(float4);
-  // TODO
-  // f32_weights ? sizeof(float4) : sizeof(half4);
-
-  const int elements_count = weights.shape.h * weights.shape.w * src_slices * dst_slices * 4;
-
-  std::vector<uint8_t> data(float4_size * elements_count);
-
-  if (f32_weights)
-  {
-    float4 *ptr = reinterpret_cast<float4 *>(data.data());
-    if (conv_params_.AreWeightsBuffer())
-    {
-      RearrangeWeightsToOHWIOGroupI4O4(weights, conv_params_.block_size.w,
-                                       absl::MakeSpan(ptr, elements_count));
-    }
-    else
-    {
-      RearrangeWeightsToI4HWIOOGroupO4(weights, conv_params_.block_size.w,
-                                       absl::MakeSpan(ptr, elements_count));
-    }
-  }
-  //   else
-  //   {
-  //     half4 *ptr = reinterpret_cast<half4 *>(data.data());
-  //     if (conv_params_.AreWeightsBuffer())
-  //     {
-  //       RearrangeWeightsToOHWIOGroupI4O4(weights, conv_params_.block_size.w,
-  //                                        absl::MakeSpan(ptr, elements_count));
-  //     }
-  //     else
-  //     {
-  //       RearrangeWeightsToI4HWIOOGroupO4(weights, conv_params_.block_size.w,
-  //                                        absl::MakeSpan(ptr, elements_count));
-  //     }
-  //   }
-  if (conv_params_.AreWeightsBuffer())
-  {
-    BufferDescriptor desc;
-    desc.element_type = conv_params_.weights_data_type;
-    desc.element_size = 4;
-    desc.memory_type =
-      conv_params_.weights_upload_type == ConvPowerVR::WeightsUploadType::CONSTANT_MEM
-        ? MemoryType::CONSTANT
-        : MemoryType::GLOBAL;
-    desc.size = float4_size * elements_count;
-    desc.data = std::move(data);
-    args_.AddObject("weights", absl::make_unique<BufferDescriptor>(std::move(desc)));
-  }
-  else
-  {
-    const int texture_width = dst_slices;
-    const int texture_height = src_slices * weights.shape.h * weights.shape.w;
-    const int sub_size = float4_size * texture_width * texture_height;
-    for (int i = 0; i < 4; ++i)
-    {
-      Texture2DDescriptor desc;
-      desc.element_type = conv_params_.weights_data_type;
-      desc.size = int2(texture_width, texture_height);
-      desc.data.resize(sub_size);
-      std::memcpy(desc.data.data(), data.data() + sub_size * i, sub_size);
-      const std::string name = "weights" + std::to_string(i);
-      args_.AddObject(name, absl::make_unique<Texture2DDescriptor>(std::move(desc)));
-    }
-  }
-}
-
-template <DataType T> void ConvPowerVR::UploadWeights(const InternalTensor<OHWDI, T> &weights)
-{
-  const int block_size = conv_params_.block_size.w;
-  const int dst_slices = AlignByN(DivideRoundUp(weights.shape.o, 4), block_size);
-  const int src_slices = DivideRoundUp(weights.shape.i, 4);
-
-  const int elements_count =
-    weights.shape.d * weights.shape.h * weights.shape.w * src_slices * dst_slices * 4;
-  const bool f32_weights = definition_.precision == CalculationsPrecision::F32;
-
-  const int float4_size = f32_weights ? 16 : 8;
-
-  std::vector<uint8_t> data(float4_size * elements_count);
-
-  if (f32_weights)
-  {
-    float4 *ptr = reinterpret_cast<float4 *>(data.data());
-    if (conv_params_.AreWeightsBuffer())
-    {
-      RearrangeWeightsToODHWIOGroupI4O4(weights, conv_params_.block_size.w,
-                                        absl::MakeSpan(ptr, elements_count));
-    }
-    else
-    {
-      RearrangeWeightsToI4DHWIOOGroupO4(weights, conv_params_.block_size.w,
-                                        absl::MakeSpan(ptr, elements_count));
-    }
-  }
-  //   else
-  //   {
-  //     half4 *ptr = reinterpret_cast<half4 *>(data.data());
-  //     if (conv_params_.AreWeightsBuffer())
-  //     {
-  //       RearrangeWeightsToODHWIOGroupI4O4(weights, conv_params_.block_size.w,
-  //                                         absl::MakeSpan(ptr, elements_count));
-  //     }
-  //     else
-  //     {
-  //       RearrangeWeightsToI4DHWIOOGroupO4(weights, conv_params_.block_size.w,
-  //                                         absl::MakeSpan(ptr, elements_count));
-  //     }
-  //   }
-
-  if (conv_params_.AreWeightsBuffer())
-  {
-    BufferDescriptor desc;
-    desc.element_type = f32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
-    desc.element_size = 4;
-    desc.size = float4_size * elements_count;
-    desc.data = std::move(data);
-    args_.AddObject("weights", absl::make_unique<BufferDescriptor>(std::move(desc)));
-  }
-  else
-  {
-    const int texture_width = dst_slices;
-    const int texture_height = src_slices * weights.shape.d * weights.shape.h * weights.shape.w;
-    int sub_size = float4_size * texture_width * texture_height;
-    for (int i = 0; i < 4; ++i)
-    {
-      Texture2DDescriptor desc;
-      desc.element_type = f32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
-      desc.size = int2(texture_width, texture_height);
-      desc.data.resize(sub_size);
-      memcpy(desc.data.data(), data.data() + sub_size * i, sub_size);
-      const std::string name = "weights" + std::to_string(i);
-      args_.AddObject(name, absl::make_unique<Texture2DDescriptor>(std::move(desc)));
-    }
-  }
-}
-
-ConvPowerVR CreateConvPowerVR(const DeviceInfo &device_info, const OperationDef &definition,
-                              const Convolution2DAttributes &attr, const BHWC *dst_shape = nullptr);
-
-ConvPowerVR CreateConvPowerVR(const DeviceInfo &device_info, const OperationDef &definition,
-                              const FullyConnectedAttributes &attr,
-                              const BHWC *dst_shape = nullptr);
-
-ConvPowerVR CreateConvPowerVRDynamicWeights(const DeviceInfo &device_info,
-                                            const OperationDef &definition,
-                                            const Convolution2DAttributes &attr,
-                                            const BHWC &weights_shape,
-                                            const BHWC *dst_shape = nullptr);
-
-ConvPowerVR CreateConvPowerVRWino4x4To6x6(const DeviceInfo &device_info,
-                                          const OperationDef &definition,
-                                          const Convolution2DAttributes &attr,
-                                          const BHWC *dst_shape = nullptr);
-
-ConvPowerVR CreateConvPowerVR3D(const DeviceInfo &device_info, const OperationDef &definition,
-                                const Convolution3DAttributes &attr,
-                                const BHWDC *dst_shape = nullptr);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_POWERVR_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvWeightsConverter.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvWeightsConverter.cc
deleted file mode 100644
index 95172bd05..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvWeightsConverter.cc
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "open_cl/kernels/ConvWeightsConverter.h"
-
-#include <string>
-
-#include "open_cl/kernels/Util.h"
-#include "open_cl/kernels/WorkGroupPicking.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-ConverterToConvWeights::ConverterToConvWeights(const OperationDef &definition,
-                                               const ConvWeightsDescription &conv_weights_desc)
-  : GPUOperation(definition), conv_weights_desc_(conv_weights_desc)
-{
-  code_ = GetConverterToConvWeightsCode(definition_, conv_weights_desc_);
-}
-
-ConverterToConvWeights::ConverterToConvWeights(ConverterToConvWeights &&operation)
-  : GPUOperation(std::move(operation)), conv_weights_desc_(operation.conv_weights_desc_)
-{
-}
-
-ConverterToConvWeights &ConverterToConvWeights::operator=(ConverterToConvWeights &&operation)
-{
-  if (this != &operation)
-  {
-    conv_weights_desc_ = operation.conv_weights_desc_;
-    GPUOperation::operator=(std::move(operation));
-  }
-  return *this;
-}
-
-std::string ConverterToConvWeights::GetConverterToConvWeightsCode(
-  const OperationDef &op_def, const ConvWeightsDescription &conv_weights_desc)
-{
-  AddSrcTensor("src_tensor", op_def.src_tensors[0]);
-  AddDstTensor("dst_tensor", op_def.dst_tensors[0]);
-  args_.AddFloat("mask_x");
-  args_.AddFloat("mask_y");
-  args_.AddFloat("mask_z");
-  args_.AddFloat("mask_w");
-
-  std::string c = GetCommonDefines(op_def.precision);
-  c += "__kernel void main_function(\n";
-  c += "$0) {\n";
-  c += "  int GROUP_SIZE = " + std::to_string(conv_weights_desc.output_group_size) + ";\n";
-  c += "  int O = get_global_id(0) * 4;\n";
-  c += "  int I = get_global_id(1);\n";
-  c += "  int Z = get_global_id(2);\n";
-  c += "  int W = Z % args.src_tensor.Width();\n";
-  c += "  int H = Z / args.src_tensor.Width();\n";
-  c += "  if (O >= args.src_tensor.Batch() || I >= args.src_tensor.Slices() || "
-       "H >= args.src_tensor.Height()) return;\n";
-  c += "  FLT4 v0 = args.src_tensor.Read(W, H, I, O + 0);\n";
-  c += "  FLT4 v1 = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n";
-  c += "  FLT4 v2 = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n";
-  c += "  FLT4 v3 = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n";
-  c += "  if (O + 1 < args.src_tensor.Batch()) {\n";
-  c += "    v1 = args.src_tensor.Read(W, H, I, O + 1);\n";
-  c += "  }\n";
-  c += "  if (O + 2 < args.src_tensor.Batch()) {\n";
-  c += "    v2 = args.src_tensor.Read(W, H, I, O + 2);\n";
-  c += "  }\n";
-  c += "  if (O + 3 < args.src_tensor.Batch()) {\n";
-  c += "    v3 = args.src_tensor.Read(W, H, I, O + 3);\n";
-  c += "  }\n";
-  c += "  if (I == args.src_tensor.Slices() - 1) {\n";
-  c += "    FLT4 mask = (FLT4)(args.mask_x, args.mask_y, args.mask_z, "
-       "args.mask_w);\n";
-  c += "    v0 *= mask;\n";
-  c += "    v1 *= mask;\n";
-  c += "    v2 *= mask;\n";
-  c += "    v3 *= mask;\n";
-  c += "  }\n";
-  c += "  FLT4 r0 = (FLT4)(v0.x, v1.x, v2.x, v3.x);\n";
-  c += "  FLT4 r1 = (FLT4)(v0.y, v1.y, v2.y, v3.y);\n";
-  c += "  FLT4 r2 = (FLT4)(v0.z, v1.z, v2.z, v3.z);\n";
-  c += "  FLT4 r3 = (FLT4)(v0.w, v1.w, v2.w, v3.w);\n";
-  c += "  int d_index = O / (GROUP_SIZE * 4);\n";
-  c += "  int k_index = (O % (GROUP_SIZE * 4)) / 4;\n";
-  c += "  int dst_offset = (((d_index * args.src_tensor.Height() + H) * "
-       "args.src_tensor.Width() + W) * "
-       "args.src_tensor.Slices() + I) * GROUP_SIZE + "
-       "k_index;\n";
-  c += "  int address0 = dst_offset * 4 + 0;\n";
-  c += "  int address1 = dst_offset * 4 + 1;\n";
-  c += "  int address2 = dst_offset * 4 + 2;\n";
-  c += "  int address3 = dst_offset * 4 + 3;\n";
-  c += "  args.dst_tensor.WriteLinear(r0, dst_offset * 4 + 0)\n;";
-  c += "  args.dst_tensor.WriteLinear(r1, dst_offset * 4 + 1)\n;";
-  c += "  args.dst_tensor.WriteLinear(r2, dst_offset * 4 + 2)\n;";
-  c += "  args.dst_tensor.WriteLinear(r3, dst_offset * 4 + 3)\n;";
-  c += "}\n";
-  return c;
-}
-
-absl::Status ConverterToConvWeights::BindArguments(ArgumentsBinder *args)
-{
-  float4 mask = GetMaskForLastPlane(src_[0]->Channels());
-  RETURN_IF_ERROR(args->SetFloat("mask_x", mask.x));
-  RETURN_IF_ERROR(args->SetFloat("mask_y", mask.y));
-  RETURN_IF_ERROR(args->SetFloat("mask_z", mask.z));
-  return args->SetFloat("mask_w", mask.w);
-}
-
-int3 ConverterToConvWeights::GetGridSize() const
-{
-  const int grid_x =
-    DivideRoundUp(AlignByN(src_[0]->Batch(), 4 * conv_weights_desc_.output_group_size), 4);
-  const int grid_y = src_[0]->Slices();
-  const int grid_z = src_[0]->Width() * src_[0]->Height();
-  return int3(grid_x, grid_y, grid_z);
-}
-
-ConverterToConvWeights CreateConverterToConvWeights(const OperationDef &definition,
-                                                    const ConvWeightsDescription &conv_weights_desc)
-{
-  return ConverterToConvWeights(definition, conv_weights_desc);
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvWeightsConverter.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvWeightsConverter.h
deleted file mode 100644
index bb68977eb..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/ConvWeightsConverter.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_WEIGHTS_CONVERTER_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_WEIGHTS_CONVERTER_H__
-
-#include "open_cl/ClCommandQueue.h"
-#include "open_cl/ClKernel.h"
-#include "open_cl/kernels/ConvCommon.h"
-#include "open_cl/kernels/GpuOperation.h"
-#include "open_cl/Status.h"
-#include "open_cl/Types.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-class ConverterToConvWeights : public GPUOperation
-{
-public:
-  ConverterToConvWeights(const OperationDef &definition,
-                         const ConvWeightsDescription &conv_weights_desc);
-  absl::Status BindArguments(ArgumentsBinder *args) override;
-  int3 GetGridSize() const override;
-
-  // Move only
-  ConverterToConvWeights(ConverterToConvWeights &&operation);
-  ConverterToConvWeights &operator=(ConverterToConvWeights &&operation);
-  ConverterToConvWeights(const ConverterToConvWeights &) = delete;
-  ConverterToConvWeights &operator=(const ConverterToConvWeights &) = delete;
-
-private:
-  std::string GetConverterToConvWeightsCode(const OperationDef &op_def,
-                                            const ConvWeightsDescription &conv_weights_desc);
-
-  ConvWeightsDescription conv_weights_desc_;
-};
-
-// We expect src BHWC tensor and we assume that B is O, H = H, W = W, C is I
-// as dst we expect Tensor with storage type BUFFER and
-// dst.b * dst.h * dst.w * dst.c = AlignByN(src.b, 4) * src.h * src.w
-// AlignByN(src.c, 4)
-ConverterToConvWeights
-CreateConverterToConvWeights(const OperationDef &definition,
-                             const ConvWeightsDescription &conv_weights_desc);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONV_WEIGHTS_CONVERTER_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Converter.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Converter.cc
deleted file mode 100644
index cc2bc41d4..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Converter.cc
+++ /dev/null
@@ -1,592 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Converter.h"
-
-#include <algorithm>
-#include <array>
-#include <string>
-
-#include "open_cl/Arguments.h"
-#include "open_cl/ClCommandQueue.h"
-#include "open_cl/ClErrors.h"
-#include "open_cl/kernels/Util.h"
-#include "open_cl/Precision.h"
-#include "open_cl/InternalTensor.h"
-#include "open_cl/TensorType.h"
-#include "open_cl/TensorTypeUtil.h"
-#include "open_cl/Util.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-class OpenClConverterImpl : public TensorObjectConverter
-{
-public:
-  virtual absl::Status Init(const TensorObjectDef &input_def, const TensorObjectDef &output_def,
-                            Environment *environment) = 0;
-
-protected:
-  absl::Status DispatchKernel(cl_mem buffer_mem, Tensor *tensor)
-  {
-    kernel_.ResetBindingCounter();
-    RETURN_IF_ERROR(kernel_.SetMemoryAuto(buffer_mem));
-    RETURN_IF_ERROR(args_.SetObjectRef("tensor", tensor));
-    RETURN_IF_ERROR(args_.Bind(kernel_.kernel(), kernel_.GetBindingCounter()));
-    const int3 grid = int3(tensor->Width() * tensor->Batch(), tensor->Height(), tensor->Slices());
-    const int3 work_group_size = {16, 8, 1};
-    const int3 work_groups_count = GetWorkGroupsCount(grid, work_group_size);
-    return queue_->Dispatch(kernel_, work_groups_count, work_group_size);
-  }
-
-  Arguments args_;
-  BHWC shape_;
-  CLKernel kernel_;
-  TensorDescriptor tensor_descriptor_;
-  CLCommandQueue *queue_ = nullptr;
-  const CLContext *context_ = nullptr;
-};
-
-bool IsSupportedDataType(DataType type)
-{
-  return type == DataType::FLOAT16 || type == DataType::FLOAT32;
-}
-
-bool IsBHWCOpenCLBuffer(const ObjectDef &def)
-{
-  return IsSupportedDataType(def.data_type) && def.object_type == ObjectType::OPENCL_BUFFER &&
-         def.data_layout == DataLayout::BHWC;
-}
-
-bool IsOpenCLTensor(const ObjectDef &def)
-{
-  const bool is_buffer_tensor =
-    def.object_type == ObjectType::OPENCL_BUFFER && def.data_layout == DataLayout::DHWC4;
-  const bool is_image2d_tensor =
-    def.object_type == ObjectType::OPENCL_TEXTURE && def.data_layout == DataLayout::HDWC4;
-  const bool is_image2d_array_tensor =
-    def.object_type == ObjectType::OPENCL_TEXTURE && def.data_layout == DataLayout::DHWC4;
-  const bool is_single_image_tensor =
-    def.object_type == ObjectType::OPENCL_TEXTURE && def.data_layout == DataLayout::BHWC;
-  return IsSupportedDataType(def.data_type) && (is_buffer_tensor || is_image2d_tensor ||
-                                                is_image2d_array_tensor || is_single_image_tensor);
-}
-
-absl::Status GetOpenCLMemory(const TensorObject &obj, cl_mem *memory)
-{
-  auto texture = absl::get_if<OpenClTexture>(&obj);
-  auto buffer = absl::get_if<OpenClBuffer>(&obj);
-  if (texture && texture->memobj)
-  {
-    *memory = texture->memobj;
-  }
-  else if (buffer && buffer->memobj)
-  {
-    *memory = buffer->memobj;
-  }
-  else
-  {
-    return absl::InvalidArgumentError("Missing OpenCL object.");
-  }
-  return absl::OkStatus();
-}
-
-// Implements conversion from OpenCL tensor to another OpenCL tensor.
-class TensorToTensorConverter : public OpenClConverterImpl
-{
-public:
-  static bool IsSupported(const ObjectDef &input, const ObjectDef &output)
-  {
-    return IsOpenCLTensor(input) && IsOpenCLTensor(output);
-  }
-
-  absl::Status Init(const TensorObjectDef &input_def, const TensorObjectDef &output_def,
-                    Environment *environment) final
-  {
-    src_tensor_descriptor_.layout = Layout::BHWC;
-    src_tensor_descriptor_.storage_type =
-      ToTensorStorageType(input_def.object_def.object_type, input_def.object_def.data_layout);
-    src_tensor_descriptor_.data_type = input_def.object_def.data_type;
-    args_.AddObjectRef("src_tensor", AccessType::READ,
-                       absl::make_unique<TensorDescriptor>(src_tensor_descriptor_));
-
-    dst_tensor_descriptor_.layout = Layout::BHWC;
-    dst_tensor_descriptor_.storage_type =
-      ToTensorStorageType(output_def.object_def.object_type, output_def.object_def.data_layout);
-    dst_tensor_descriptor_.data_type = output_def.object_def.data_type;
-    args_.AddObjectRef("dst_tensor", AccessType::WRITE,
-                       absl::make_unique<TensorDescriptor>(dst_tensor_descriptor_));
-
-    const bool need_fp16_support = input_def.object_def.data_type == DataType::FLOAT16 ||
-                                   output_def.object_def.data_type == DataType::FLOAT16;
-    const std::string out_data_type = ToCLDataType(output_def.object_def.data_type);
-    std::string shader_src;
-    if (need_fp16_support)
-    {
-      shader_src += "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
-    }
-    shader_src +=
-      R"(__kernel void tensor_to_tensor($0) {
-  int linear_id = get_global_id(0);
-  int x = linear_id / args.dst_tensor.Batch();
-  int b = linear_id % args.dst_tensor.Batch();
-  int y = get_global_id(1);
-  int d = get_global_id(2);
-  if (x >= args.dst_tensor.Width() || y >= args.dst_tensor.Height() || d >= args.dst_tensor.Slices()) return;
-)";
-    shader_src +=
-      "  " + out_data_type + "4 input = args.src_tensor.Read<" + out_data_type + ">(x, y, d, b);\n";
-    shader_src += "  args.dst_tensor.Write(input, x, y, d, b);\n}";
-    queue_ = environment->queue();
-    context_ = &environment->context();
-    shape_ = BHWC(input_def.dimensions.b, input_def.dimensions.h, input_def.dimensions.w,
-                  input_def.dimensions.c);
-    RETURN_IF_ERROR(args_.TransformToCLCode(environment->device().info_, {}, &shader_src));
-    return environment->program_cache()->GetOrCreateCLKernel(
-      shader_src, "tensor_to_tensor", environment->context(), environment->device(), &kernel_);
-  }
-
-  absl::Status Convert(const TensorObject &input_obj, const TensorObject &output_obj) override
-  {
-    cl_mem in_memory = nullptr;
-    RETURN_IF_ERROR(GetOpenCLMemory(input_obj, &in_memory));
-    cl_mem out_memory = nullptr;
-    RETURN_IF_ERROR(GetOpenCLMemory(output_obj, &out_memory));
-
-    Tensor src_tensor;
-    RETURN_IF_ERROR(
-      CreateSharedTensor(*context_, in_memory, shape_, src_tensor_descriptor_, &src_tensor));
-    Tensor dst_tensor;
-    RETURN_IF_ERROR(
-      CreateSharedTensor(*context_, out_memory, shape_, dst_tensor_descriptor_, &dst_tensor));
-
-    RETURN_IF_ERROR(args_.SetObjectRef("src_tensor", &src_tensor));
-    RETURN_IF_ERROR(args_.SetObjectRef("dst_tensor", &dst_tensor));
-
-    RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
-    const int3 grid =
-      int3(dst_tensor.Width() * dst_tensor.Batch(), dst_tensor.Height(), dst_tensor.Slices());
-    const int3 work_group_size = {16, 8, 1};
-    const int3 work_groups_count = GetWorkGroupsCount(grid, work_group_size);
-    return queue_->Dispatch(kernel_, work_groups_count, work_group_size);
-  }
-
-private:
-  TensorDescriptor src_tensor_descriptor_;
-  TensorDescriptor dst_tensor_descriptor_;
-};
-
-// Implements conversion from OpenCL-specific tensor layout to BHWC OpenCL
-// buffer.
-class TensorToBHWCBufferConverter : public OpenClConverterImpl
-{
-public:
-  static bool IsSupported(const ObjectDef &input, const ObjectDef &output)
-  {
-    return IsOpenCLTensor(input) && IsBHWCOpenCLBuffer(output);
-  }
-
-  absl::Status Init(const TensorObjectDef &input_def, const TensorObjectDef &output_def,
-                    Environment *environment) final
-  {
-    TensorStorageType src_tensor_type =
-      ToTensorStorageType(input_def.object_def.object_type, input_def.object_def.data_layout);
-    tensor_descriptor_.layout = Layout::BHWC;
-    tensor_descriptor_.storage_type = src_tensor_type;
-    tensor_descriptor_.data_type = input_def.object_def.data_type;
-    args_.AddObjectRef("tensor", AccessType::READ,
-                       absl::make_unique<TensorDescriptor>(tensor_descriptor_));
-
-    const bool need_fp16_support = input_def.object_def.data_type == DataType::FLOAT16 ||
-                                   output_def.object_def.data_type == DataType::FLOAT16;
-    std::string shader_src;
-    if (need_fp16_support)
-    {
-      shader_src += "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
-    }
-    const std::string out_data_type = ToCLDataType(output_def.object_def.data_type);
-    shader_src += "__kernel void tensor_to_bhwc(";
-    shader_src += "__global " + out_data_type + "* dst, $0) {\n";
-    shader_src += R"(  int linear_id = get_global_id(0);
-  int x = linear_id / args.tensor.Batch();
-  int b = linear_id % args.tensor.Batch();
-  int y = get_global_id(1);
-  int d = get_global_id(2);
-  if (x >= args.tensor.Width() || y >= args.tensor.Height() || d >= args.tensor.Slices()) return;
-)";
-    shader_src +=
-      "  " + out_data_type + "4 input = args.tensor.Read<" + out_data_type + ">(x, y, d, b);\n";
-    shader_src += R"(  int c = d * 4;
-  int index = ((b * args.tensor.Height() + y) * args.tensor.Width() + x) * args.tensor.Channels() + c;
-
-  dst[index] = input.x;
-  if (c + 1 < args.tensor.Channels()) {
-    dst[index + 1] = input.y;
-  }
-  if (c + 2 < args.tensor.Channels()) {
-    dst[index + 2] = input.z;
-  }
-  if (c + 3 < args.tensor.Channels()) {
-    dst[index + 3] = input.w;
-  }
-})";
-    queue_ = environment->queue();
-    context_ = &environment->context();
-    shape_ = BHWC(input_def.dimensions.b, input_def.dimensions.h, input_def.dimensions.w,
-                  input_def.dimensions.c);
-    RETURN_IF_ERROR(args_.TransformToCLCode(environment->device().info_, {}, &shader_src));
-    return environment->program_cache()->GetOrCreateCLKernel(
-      shader_src, "tensor_to_bhwc", environment->context(), environment->device(), &kernel_);
-  }
-
-  absl::Status Convert(const TensorObject &input_obj, const TensorObject &output_obj) override
-  {
-    auto output = absl::get_if<OpenClBuffer>(&output_obj);
-    if (!output || !output->memobj)
-    {
-      return absl::InvalidArgumentError("Missing output in tensor_to_bhwc converter");
-    }
-
-    cl_mem in_memory = nullptr;
-    RETURN_IF_ERROR(GetOpenCLMemory(input_obj, &in_memory));
-    Tensor tensor;
-    RETURN_IF_ERROR(CreateSharedTensor(*context_, in_memory, shape_, tensor_descriptor_, &tensor));
-    return DispatchKernel(output->memobj, &tensor);
-  }
-};
-
-// Implements conversion from BHWC OpenCL buffer to OpenCL-specific tensor
-// layout.
-class BHWCBufferToTensorConverter : public OpenClConverterImpl
-{
-public:
-  static bool IsSupported(const ObjectDef &input, const ObjectDef &output)
-  {
-    return IsBHWCOpenCLBuffer(input) && IsOpenCLTensor(output);
-  }
-
-  std::pair<std::string, std::string> GetFromBhwcKernel(const TensorObjectDef &input_def,
-                                                        const TensorObjectDef &) const
-  {
-    return std::make_pair("__global " + ToCLDataType(input_def.object_def.data_type) + "* src",
-                          R"(int c = d * 4;
-  int index = ((b * args.tensor.Height() + y) * args.tensor.Width() + x) * args.tensor.Channels() + c;
-  result.x = src[index];
-  result.y = c + 1 < args.tensor.Channels() ? src[index + 1] : 1;
-  result.z = c + 2 < args.tensor.Channels() ? src[index + 2] : 2;
-  result.w = c + 3 < args.tensor.Channels() ? src[index + 3] : 3;
-)");
-  }
-
-  absl::Status Init(const TensorObjectDef &input_def, const TensorObjectDef &output_def,
-                    Environment *environment) final
-  {
-    auto params_kernel = GetFromBhwcKernel(input_def, output_def);
-
-    TensorStorageType dst_tensor_type =
-      ToTensorStorageType(output_def.object_def.object_type, output_def.object_def.data_layout);
-    tensor_descriptor_.layout = Layout::BHWC;
-    tensor_descriptor_.storage_type = dst_tensor_type;
-    tensor_descriptor_.data_type = output_def.object_def.data_type;
-    args_.AddObjectRef("tensor", AccessType::WRITE,
-                       absl::make_unique<TensorDescriptor>(tensor_descriptor_));
-
-    const bool need_fp16_support = input_def.object_def.data_type == DataType::FLOAT16 ||
-                                   output_def.object_def.data_type == DataType::FLOAT16;
-    std::string shader_src;
-    if (need_fp16_support)
-    {
-      shader_src += "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
-    }
-    const std::string in_data_type = ToCLDataType(input_def.object_def.data_type);
-    const std::string out_data_type = ToCLDataType(output_def.object_def.data_type);
-    shader_src += "__kernel void bhwc_to_tensor(";
-    shader_src += "__global " + in_data_type + "* src, $0) {\n";
-
-    shader_src += R"(  int linear_id = get_global_id(0);
-  int x = linear_id / args.tensor.Batch();
-  int b = linear_id % args.tensor.Batch();
-  int y = get_global_id(1);
-  int d = get_global_id(2);
-
-  if (x >= args.tensor.Width() || y >= args.tensor.Height() || d >= args.tensor.Slices()) return;
-)";
-    shader_src += "  " + out_data_type + "4 result;\n";
-    shader_src += R"(  int c = d * 4;
-  int index = ((b * args.tensor.Height() + y) * args.tensor.Width() + x) * args.tensor.Channels() + c;
-  result.x = src[index];
-  result.y = c + 1 < args.tensor.Channels() ? src[index + 1] : 1;
-  result.z = c + 2 < args.tensor.Channels() ? src[index + 2] : 2;
-  result.w = c + 3 < args.tensor.Channels() ? src[index + 3] : 3;
-)";
-    shader_src += "  args.tensor.Write(result, x, y, d, b);\n}";
-    queue_ = environment->queue();
-    context_ = &environment->context();
-    shape_ = BHWC(output_def.dimensions.b, output_def.dimensions.h, output_def.dimensions.w,
-                  output_def.dimensions.c);
-    RETURN_IF_ERROR(args_.TransformToCLCode(environment->device().info_, {}, &shader_src));
-    return environment->program_cache()->GetOrCreateCLKernel(
-      shader_src, "bhwc_to_tensor", environment->context(), environment->device(), &kernel_);
-  }
-
-  absl::Status Convert(const TensorObject &input_obj, const TensorObject &output_obj) override
-  {
-    auto input = absl::get_if<OpenClBuffer>(&input_obj);
-    if (!input || !input->memobj)
-    {
-      return absl::InvalidArgumentError("Missing input in bhwc_to_tensor converter");
-    }
-    cl_mem out_memory = nullptr;
-    RETURN_IF_ERROR(GetOpenCLMemory(output_obj, &out_memory));
-    Tensor tensor;
-    RETURN_IF_ERROR(CreateSharedTensor(*context_, out_memory, shape_, tensor_descriptor_, &tensor));
-    return DispatchKernel(input->memobj, &tensor);
-  }
-};
-
-std::array<size_t, 3> CalculateTextureRegion(const TensorObjectDef &def)
-{
-  const auto &dims = def.dimensions;
-  std::array<size_t, 3> region = {0, 0, 1};
-  switch (ToTensorStorageType(def.object_def.object_type, def.object_def.data_layout))
-  {
-    case TensorStorageType::SINGLE_TEXTURE_2D:
-      region[0] = static_cast<size_t>(dims.w * dims.b);
-      region[1] = static_cast<size_t>(dims.h);
-      break;
-    case TensorStorageType::TEXTURE_2D:
-      region[0] = static_cast<size_t>(dims.w * dims.b);
-      region[1] = static_cast<size_t>(dims.h * dims.d());
-      break;
-    case TensorStorageType::TEXTURE_ARRAY:
-      region[0] = static_cast<size_t>(dims.w * dims.b);
-      region[1] = static_cast<size_t>(dims.h);
-      region[2] = static_cast<size_t>(dims.d());
-      break;
-    default:
-      break;
-  }
-  return region;
-}
-
-bool IsOpenClTextureOrBuffer(ObjectType type)
-{
-  return type == ObjectType::OPENCL_BUFFER || type == ObjectType::OPENCL_TEXTURE;
-}
-
-// Copies data from one object of the same type and layout to another object.
-class TrivialCopier : public OpenClConverterImpl
-{
-public:
-  static bool IsSupported(const ObjectDef &input, const ObjectDef &output)
-  {
-    return IsOpenClTextureOrBuffer(input.object_type) && input.data_type == output.data_type &&
-           input.object_type == output.object_type && input.data_layout == output.data_layout;
-  }
-
-  absl::Status Init(const TensorObjectDef &input_def, const TensorObjectDef &output_def,
-                    Environment *environment) final
-  {
-    shape_ = BHWC(input_def.dimensions.b, input_def.dimensions.h, input_def.dimensions.w,
-                  input_def.dimensions.c);
-    data_type_ = input_def.object_def.data_type;
-    queue_ = environment->queue();
-    region_ = CalculateTextureRegion(output_def);
-    return absl::OkStatus();
-  }
-
-  absl::Status Convert(const TensorObject &input_obj, const TensorObject &output_obj) override
-  {
-    auto texture_input = absl::get_if<OpenClTexture>(&input_obj);
-    auto texture_output = absl::get_if<OpenClTexture>(&output_obj);
-    if (texture_input && texture_output)
-    {
-      return Copy(*texture_input, *texture_output);
-    }
-    auto buffer_input = absl::get_if<OpenClBuffer>(&input_obj);
-    auto buffer_output = absl::get_if<OpenClBuffer>(&output_obj);
-    if (buffer_input && buffer_output)
-    {
-      return Copy(*buffer_input, *buffer_output);
-    }
-    return absl::InternalError("Unexpected object");
-  }
-
-  absl::Status Copy(const OpenClBuffer &input, const OpenClBuffer &output)
-  {
-    if (input.memobj == output.memobj)
-    {
-      return absl::OkStatus();
-    }
-    return GetOpenCLError(clEnqueueCopyBuffer(queue_->queue(), input.memobj, output.memobj, 0, 0,
-                                              SizeOf(data_type_) * shape_.w * shape_.h *
-                                                AlignByN(shape_.c, 4) * shape_.b,
-                                              0, nullptr, nullptr));
-  }
-
-  absl::Status Copy(const OpenClTexture &input, const OpenClTexture &output)
-  {
-    if (input.memobj == output.memobj)
-    {
-      return absl::OkStatus();
-    }
-    size_t origin[3] = {0, 0, 0};
-    return GetOpenCLError(clEnqueueCopyImage(queue_->queue(), input.memobj, output.memobj, origin,
-                                             origin, region_.data(), 0, nullptr, nullptr));
-  }
-
-private:
-  DataType data_type_ = DataType::UNKNOWN;
-  std::array<size_t, 3> region_;
-};
-
-// Copies data from/to CPU into a tensor.
-class CpuCopier : public OpenClConverterImpl
-{
-public:
-  static bool IsSupported(const ObjectDef &input, const ObjectDef &output)
-  {
-    return input.data_type == output.data_type && input.data_layout == output.data_layout &&
-           ((input.object_type == ObjectType::CPU_MEMORY &&
-             IsOpenClTextureOrBuffer(output.object_type)) ||
-            (output.object_type == ObjectType::CPU_MEMORY &&
-             IsOpenClTextureOrBuffer(input.object_type)));
-  }
-
-  absl::Status Init(const TensorObjectDef &input_def, const TensorObjectDef &output_def,
-                    Environment *environment) final
-  {
-
-    region_ = CalculateTextureRegion(
-      input_def.object_def.object_type == ObjectType::CPU_MEMORY ? output_def : input_def);
-    queue_ = environment->queue();
-    return absl::OkStatus();
-  }
-
-  absl::Status Convert(const TensorObject &input_obj, const TensorObject &output_obj) override
-  {
-    auto cpu_input = absl::get_if<CpuMemory>(&input_obj);
-    auto cpu_output = absl::get_if<CpuMemory>(&output_obj);
-
-    if (cpu_input)
-    {
-      auto texture_output = absl::get_if<OpenClTexture>(&output_obj);
-      if (texture_output)
-      {
-        return queue_->EnqueueWriteImage(texture_output->memobj,
-                                         int3(region_[0], region_[1], region_[2]), cpu_input->data);
-      }
-      auto buffer_output = absl::get_if<OpenClBuffer>(&output_obj);
-      if (buffer_output)
-      {
-        return queue_->EnqueueWriteBuffer(buffer_output->memobj, cpu_input->size_bytes,
-                                          cpu_input->data);
-      }
-    }
-    else if (cpu_output)
-    {
-      auto texture_input = absl::get_if<OpenClTexture>(&input_obj);
-      if (texture_input)
-      {
-        return queue_->EnqueueReadImage(texture_input->memobj,
-                                        int3(region_[0], region_[1], region_[2]), cpu_output->data);
-      }
-      auto buffer_input = absl::get_if<OpenClBuffer>(&input_obj);
-      if (buffer_input)
-      {
-        return queue_->EnqueueReadBuffer(buffer_input->memobj, cpu_output->size_bytes,
-                                         cpu_output->data);
-      }
-    }
-    return absl::InternalError("Unexpected object");
-  }
-
-private:
-  std::array<size_t, 3> region_;
-};
-
-class OpenClTensorConverterBuilder : public TensorObjectConverterBuilder
-{
-public:
-  explicit OpenClTensorConverterBuilder(Environment *environment) : environment_(environment) {}
-
-  bool IsSupported(const TensorObjectDef &input, const TensorObjectDef &output) const final
-  {
-    const auto &input_def = input.object_def;
-    const auto &output_def = output.object_def;
-    return input.dimensions == output.dimensions &&
-           (TrivialCopier::IsSupported(input_def, output_def) ||
-            TensorToTensorConverter::IsSupported(input_def, output_def) ||
-            CpuCopier::IsSupported(input_def, output_def) ||
-            TensorToBHWCBufferConverter::IsSupported(input_def, output_def) ||
-            BHWCBufferToTensorConverter::IsSupported(input_def, output_def));
-  }
-
-  absl::Status MakeConverter(const TensorObjectDef &input, const TensorObjectDef &output,
-                             std::unique_ptr<TensorObjectConverter> *converter) final
-  {
-    std::unique_ptr<OpenClConverterImpl> impl;
-    const auto &input_def = input.object_def;
-    const auto &output_def = output.object_def;
-    if (TrivialCopier::IsSupported(input_def, output_def))
-    {
-      impl = absl::make_unique<TrivialCopier>();
-    }
-    else if (TensorToTensorConverter::IsSupported(input_def, output_def))
-    {
-      impl = absl::make_unique<TensorToTensorConverter>();
-    }
-    else if (CpuCopier::IsSupported(input_def, output_def))
-    {
-      impl = absl::make_unique<CpuCopier>();
-    }
-    else if (TensorToBHWCBufferConverter::IsSupported(input_def, output_def))
-    {
-      impl = absl::make_unique<TensorToBHWCBufferConverter>();
-    }
-    else if (BHWCBufferToTensorConverter::IsSupported(input_def, output_def))
-    {
-      impl = absl::make_unique<BHWCBufferToTensorConverter>();
-    }
-    else
-    {
-      return absl::UnimplementedError("Unsupported conversion");
-    }
-    RETURN_IF_ERROR(impl->Init(input, output, environment_));
-    *converter = std::move(impl);
-    return absl::OkStatus();
-  }
-
-  Environment *environment_;
-};
-
-} // namespace
-
-std::unique_ptr<TensorObjectConverterBuilder> NewConverterBuilder(Environment *environment)
-{
-  return absl::make_unique<OpenClTensorConverterBuilder>(environment);
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Converter.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Converter.h
deleted file mode 100644
index d69ec85bb..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Converter.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONVERTER_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONVERTER_H__
-
-#include <memory>
-
-#include "open_cl/Environment.h"
-#include "open_cl/Spi.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-// Supports conversions from BHWC to internal OpenCL tensor representation and
-// back. Also supports F16/F32.
-std::unique_ptr<TensorObjectConverterBuilder> NewConverterBuilder(Environment *environment);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_CONVERTER_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv.cc
deleted file mode 100644
index e409fef47..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv.cc
+++ /dev/null
@@ -1,382 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DepthwiseConv.h"
-
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "open_cl/ClDevice.h"
-#include "open_cl/kernels/Util.h"
-#include "open_cl/kernels/WorkGroupPicking.h"
-#include "open_cl/LinearStorage.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-bool IsSpecializedCase(int channel_multiplier)
-{
-  return channel_multiplier == 1 || channel_multiplier == 2 || channel_multiplier == 4;
-}
-
-std::string GetSrcValue(int channel_multiplier, const std::string coords)
-{
-  std::string c;
-  if (channel_multiplier == 1)
-  {
-    c += "      FLT4 src_final = args.src_tensor.Read(" + coords + ", S);\n";
-  }
-  else if (channel_multiplier == 2)
-  {
-    c += "      int s_layer = S / 2;\n";
-    c += "      FLT4 src = args.src_tensor.Read(" + coords + ", s_layer);\n";
-    c += "      FLT2 t0 = S % 2 == 0 ? src.xy : src.zw;\n";
-    c += "      FLT4 src_final = (FLT4)(t0.x, t0.x, t0.y, t0.y);\n";
-  }
-  else if (channel_multiplier == 4)
-  {
-    c += "      int s_layer = S / 4;\n";
-    c += "      FLT4 src = args.src_tensor.Read(" + coords + ", s_layer);\n";
-    c += "      FLT t0 = src.x;\n";
-    c += "      int reminder = S % 4;\n";
-    c += "      if (reminder == 1) t0 = src.y;\n";
-    c += "      if (reminder == 2) t0 = src.z;\n";
-    c += "      if (reminder == 3) t0 = src.w;\n";
-    c += "      FLT4 src_final = (FLT4)(t0, t0, t0, t0);\n";
-  }
-  else
-  {
-    c += "      int s_layer = S / args.ch_multiplier;\n";
-    c += "      FLT4 src = args.src_tensor.Read(" + coords + ", s_layer);\n";
-    c += "      int s_offset = (S % args.ch_multiplier) * 4;\n";
-    c += "      FLT4 src_final;\n";
-    c += "      FLT temp_arr[4] = {src.x, src.y, src.z, src.w};\n";
-    c += "      src_final.x = temp_arr[(s_offset + 0) / args.ch_multiplier];\n";
-    c += "      src_final.y = temp_arr[(s_offset + 1) / args.ch_multiplier];\n";
-    c += "      src_final.z = temp_arr[(s_offset + 2) / args.ch_multiplier];\n";
-    c += "      src_final.w = temp_arr[(s_offset + 3) / args.ch_multiplier];\n";
-  }
-
-  return c;
-}
-
-std::string GenerateDepthwiseConvolutionCode(const OperationDef &op_def, bool stride_correction,
-                                             int channel_multiplier, bool weights_are_buffer,
-                                             bool dynamic_weights, GPUOperation *op)
-{
-  auto src_desc = op_def.src_tensors[0];
-  src_desc.SetTextureAddressMode(TextureAddressMode::ZERO);
-  if (op_def.IsBatchSupported())
-  {
-    src_desc.SetStateVar("BatchedWidth", "true");
-  }
-  op->AddSrcTensor("src_tensor", src_desc);
-  if (dynamic_weights)
-  {
-    op->AddSrcTensor("weights", op_def.src_tensors[1]);
-  }
-
-  auto dst_desc = op_def.dst_tensors[0];
-  if (op_def.IsBatchSupported())
-  {
-    dst_desc.SetStateVar("BatchedWidth", "true");
-  }
-  op->AddDstTensor("dst_tensor", dst_desc);
-
-  const auto src_tensor_type = op_def.src_tensors[0].storage_type;
-
-  std::string c = GetCommonDefines(op_def.precision);
-
-  const bool manual_clamp = src_tensor_type == TensorStorageType::BUFFER ||
-                            src_tensor_type == TensorStorageType::IMAGE_BUFFER;
-
-  c += "__kernel void main_function(\n";
-  c += "$0) {\n";
-  c += "  int X = get_global_id(0);\n";
-  if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
-  {
-    c += "  int linear_id_1 = get_global_id(1);\n";
-    c += "  int Y = linear_id_1 / args.dst_tensor.Depth();\n";
-    c += "  int Z = linear_id_1 % args.dst_tensor.Depth();\n";
-  }
-  else
-  {
-    c += "  int Y = get_global_id(1);\n";
-  }
-  c += "  int S = get_global_id(2);\n";
-  c += "  if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || "
-       "S >= args.dst_tensor.Slices()) { \n";
-  c += "    return; \n";
-  c += "  } \n";
-  c += "  ACCUM_FLT4 r = (ACCUM_FLT4)(0.0f, 0.0f, 0.0f, 0.0f);\n";
-  if (stride_correction)
-  {
-    c += "  int x_offseted = " +
-         GetXStrideCorrectedV2("X", "args.src_tensor.Batch()", "args.stride_x", "args.padding_x") +
-         ";\n";
-  }
-  else
-  {
-    if (op_def.IsBatchSupported())
-    {
-      c += "  int x_offseted = X * args.stride_x + args.padding_x * "
-           "args.src_tensor.Batch();\n";
-    }
-    else
-    {
-      c += "  int x_offseted = X * args.stride_x + args.padding_x;\n";
-    }
-  }
-  c += "  int y_offseted = Y * args.stride_y + args.padding_y;\n";
-  if (!dynamic_weights)
-  {
-    std::string weights_offset = "args.kernel_size_x * args.kernel_size_y";
-    if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
-    {
-      c += "  int z_offseted = Z * args.stride_z + args.padding_z;\n";
-      weights_offset += " * args.kernel_size_z";
-    }
-    if (weights_are_buffer)
-    {
-      c += "  int fx_c = S * " + weights_offset + ";\n";
-    }
-    else
-    {
-      c += "  int fx_c = 0;\n";
-    }
-  }
-  std::string kernel_size_x = dynamic_weights ? "args.weights.Width()" : "args.kernel_size_x";
-  std::string kernel_size_y = dynamic_weights ? "args.weights.Height()" : "args.kernel_size_y";
-  std::string kernel_size_z = dynamic_weights ? "args.weights.Depth()" : "args.kernel_size_z";
-
-  std::string flat_coords = "x_c, y_c";
-  if (manual_clamp)
-  {
-    std::string check = "!outside_x && !outside_y";
-    if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
-    {
-      check += " && !outside_z";
-      flat_coords += ", z_c";
-      c += "  for (int kz = 0; kz < " + kernel_size_z + "; ++kz) {\n";
-      c += "    int z_c = z_offseted + kz * args.dilation_z;\n";
-      c += "    bool outside_z = z_c < 0 || z_c >= args.src_tensor.Depth();\n";
-    }
-    c += "  for (int ky = 0; ky < " + kernel_size_y + "; ++ky) {\n";
-    c += "    int y_c = y_offseted + ky * args.dilation_y;\n";
-    c += "    bool outside_y = y_c < 0 || y_c >= args.src_tensor.Height();\n";
-    c += "    for (int kx = 0; kx < " + kernel_size_x + "; ++kx) {\n";
-    const std::string dilation_x =
-      op_def.IsBatchSupported() ? "args.dilation_x * args.src_tensor.Batch()" : "args.dilation_x";
-    c += "      int x_c = x_offseted + kx * " + dilation_x + ";\n";
-    c += "      bool outside_x = x_c < 0 || x_c >= args.src_tensor.Width();\n";
-    c += "      if (" + check + ") {\n";
-    if (dynamic_weights)
-    {
-      c += "        FLT4 f = args.weights.Read(kx, ky, S);\n";
-    }
-    else
-    {
-      if (weights_are_buffer)
-      {
-        c += "        FLT4 f = args.weights.Read(fx_c);\n";
-      }
-      else
-      {
-        c += "        FLT4 f = args.weights.Read(fx_c, S);\n";
-      }
-    }
-    c += GetSrcValue(channel_multiplier, flat_coords);
-    c += "        r += TO_ACCUM_TYPE(src_final * f);\n";
-    c += "      };\n";
-    if (!dynamic_weights)
-    {
-      c += "      fx_c++;\n";
-    }
-    c += "    }\n";
-    c += "  }\n";
-    if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
-    {
-      c += "  }\n";
-    }
-  }
-  else
-  { // Texture types with ZERO clamping
-    if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
-    {
-      flat_coords += ", z_c";
-      c += "  for (int kz = 0; kz < " + kernel_size_z + "; ++kz) {\n";
-      c += "    int z_c = z_offseted + kz * args.dilation_z;\n";
-      if (src_tensor_type != TensorStorageType::TEXTURE_3D)
-      { // Only TEXTURE_3D supports clamping
-        // in DEPTH dimension
-        c += "    if (z_c < 0 || z_c >= args.src_tensor.Depth()) {\n";
-        c += "      fx_c += args.kernel_size_y * args.kernel_size_x;\n";
-        c += "      continue;\n";
-        c += "    }\n";
-      }
-    }
-    c += "  for (int ky = 0; ky < " + kernel_size_y + "; ++ky) {\n";
-    c += "    int y_c = y_offseted + ky * args.dilation_y;\n";
-    c += "    for (int kx = 0; kx < " + kernel_size_x + "; ++kx) {\n";
-    const std::string dilation_x =
-      op_def.IsBatchSupported() ? "args.dilation_x * args.src_tensor.Batch()" : "args.dilation_x";
-    c += "      int x_c = x_offseted + kx * " + dilation_x + ";\n";
-    c += GetSrcValue(channel_multiplier, flat_coords);
-    if (dynamic_weights)
-    {
-      c += "      FLT4 f = args.weights.Read(kx, ky, S);\n";
-    }
-    else
-    {
-      if (weights_are_buffer)
-      {
-        c += "      FLT4 f = args.weights.Read(fx_c);\n";
-      }
-      else
-      {
-        c += "      FLT4 f = args.weights.Read(fx_c, S);\n";
-      }
-      c += "      fx_c++;\n";
-    }
-    c += "      r += TO_ACCUM_TYPE(src_final * f);\n";
-    c += "    }\n";
-    c += "  }\n";
-    if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
-    {
-      c += "  }\n";
-    }
-  }
-  c += "  FLT4 res0 = TO_FLT4(r) + args.biases.Read(S);\n";
-  if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
-  {
-    c += "  args.dst_tensor.Write(res0, X, Y, Z, S);\n";
-  }
-  else
-  {
-    c += "  args.dst_tensor.Write(res0, X, Y, S);\n";
-  }
-  c += "}\n";
-
-  return c;
-}
-} // namespace
-
-GPUOperation CreateDepthwiseConvolution2D(const DeviceInfo &device_info,
-                                          const OperationDef &definition,
-                                          const DepthwiseConvolution2DAttributes &attr)
-{
-  bool weights_are_buffer = device_info.IsMali();
-  GPUOperation op(definition);
-  op.args_.AddInt("kernel_size_x", attr.weights.shape.w);
-  op.args_.AddInt("stride_x", attr.strides.w);
-  op.args_.AddInt("padding_x", -attr.padding.prepended.w);
-  op.args_.AddInt("dilation_x", attr.dilations.w);
-  op.args_.AddInt("kernel_size_y", attr.weights.shape.h);
-  op.args_.AddInt("stride_y", attr.strides.h);
-  op.args_.AddInt("padding_y", -attr.padding.prepended.h);
-  op.args_.AddInt("dilation_y", attr.dilations.h);
-  if (!IsSpecializedCase(attr.weights.shape.o))
-  {
-    op.args_.AddInt("ch_multiplier", attr.weights.shape.o);
-  }
-  const bool stride_correction = definition.IsBatchSupported() && attr.strides.w != 1;
-  op.code_ = GenerateDepthwiseConvolutionCode(definition, stride_correction, attr.weights.shape.o,
-                                              weights_are_buffer, false, &op);
-  UploadWeightsForDWConv2D(attr.weights, weights_are_buffer, definition.precision, &op);
-  op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ;
-
-  TensorLinearDescriptor desc;
-  desc.storage_type =
-    weights_are_buffer ? LinearStorageType::BUFFER : LinearStorageType::TEXTURE_2D;
-  desc.element_type = definition.GetDataType();
-  desc.UploadLinearData(attr.bias);
-  op.args_.AddObject("biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
-  return op;
-}
-
-GPUOperation
-CreateDepthwiseConvolution2DDynamicWeights(const DeviceInfo &device_info,
-                                           const OperationDef &definition,
-                                           const DepthwiseConvolution2DAttributes &attr)
-{
-  GPUOperation op(definition);
-  op.args_.AddInt("stride_x", attr.strides.w);
-  op.args_.AddInt("padding_x", -attr.padding.prepended.w);
-  op.args_.AddInt("dilation_x", attr.dilations.w);
-  op.args_.AddInt("stride_y", attr.strides.h);
-  op.args_.AddInt("padding_y", -attr.padding.prepended.h);
-  op.args_.AddInt("dilation_y", attr.dilations.h);
-  const bool stride_correction = definition.IsBatchSupported() && attr.strides.w != 1;
-  op.code_ = GenerateDepthwiseConvolutionCode(definition, stride_correction, 1, false, true, &op);
-  op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ;
-
-  TensorLinearDescriptor desc;
-  desc.storage_type =
-    device_info.IsMali() ? LinearStorageType::BUFFER : LinearStorageType::TEXTURE_2D;
-  desc.element_type = definition.GetDataType();
-  desc.UploadLinearData(attr.bias);
-  op.args_.AddObject("biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
-  return op;
-}
-
-GPUOperation CreateDepthwiseConvolution3D(const DeviceInfo &device_info,
-                                          const OperationDef &definition,
-                                          const DepthwiseConvolution3DAttributes &attr)
-{
-  bool weights_are_buffer = device_info.IsMali();
-  GPUOperation op(definition);
-  op.args_.AddInt("kernel_size_x", attr.weights.shape.w);
-  op.args_.AddInt("stride_x", attr.strides.w);
-  op.args_.AddInt("padding_x", -attr.padding.prepended.w);
-  op.args_.AddInt("dilation_x", attr.dilations.w);
-  op.args_.AddInt("kernel_size_y", attr.weights.shape.h);
-  op.args_.AddInt("stride_y", attr.strides.h);
-  op.args_.AddInt("padding_y", -attr.padding.prepended.h);
-  op.args_.AddInt("dilation_y", attr.dilations.h);
-  op.args_.AddInt("kernel_size_z", attr.weights.shape.d);
-  op.args_.AddInt("stride_z", attr.strides.d);
-  op.args_.AddInt("padding_z", -attr.padding.prepended.d);
-  op.args_.AddInt("dilation_z", attr.dilations.d);
-  if (!IsSpecializedCase(attr.weights.shape.o))
-  {
-    op.args_.AddInt("ch_multiplier", attr.weights.shape.o);
-  }
-  const bool stride_correction = definition.IsBatchSupported() && attr.strides.w != 1;
-  op.code_ = GenerateDepthwiseConvolutionCode(definition, stride_correction, attr.weights.shape.o,
-                                              weights_are_buffer, false, &op);
-  UploadWeightsForDWConv3D(attr.weights, weights_are_buffer, definition.precision, &op);
-  op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ;
-
-  TensorLinearDescriptor desc;
-  desc.storage_type =
-    weights_are_buffer ? LinearStorageType::BUFFER : LinearStorageType::TEXTURE_2D;
-  desc.element_type = definition.GetDataType();
-  desc.UploadLinearData(attr.bias);
-  op.args_.AddObject("biases", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
-  return op;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv.h
deleted file mode 100644
index cbadd9fde..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv.h
+++ /dev/null
@@ -1,233 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_DEPTHWISE_CONV_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_DEPTHWISE_CONV_H__
-
-#include <vector>
-
-#include "open_cl/Buffer.h"
-#include "open_cl/kernels/GpuOperation.h"
-#include "open_cl/LinearStorage.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Texture2d.h"
-#include "open_cl/Util.h"
-#include "open_cl/DataType.h"
-#include "open_cl/Operations.h"
-#include "open_cl/Shape.h"
-#include "open_cl/Status.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Types.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-template <DataType S, typename T>
-void RearrangeWeightsForDWConv2D(const InternalTensor<OHWI, S> &weights, absl::Span<T> dst)
-{
-  const int dst_channels = weights.shape.i * weights.shape.o;
-  const int dst_depth = DivideRoundUp(dst_channels, 4);
-  const int kernel_x = weights.shape.w;
-  const int kernel_y = weights.shape.h;
-
-  int counter = 0;
-  for (int d = 0; d < dst_depth; ++d)
-  {
-    for (int y = 0; y < kernel_y; ++y)
-    {
-      for (int x = 0; x < kernel_x; ++x)
-      {
-        T filter_val;
-        for (int i = 0; i < 4; ++i)
-        {
-          const int d_ch = d * 4 + i;
-          if (d_ch < dst_channels)
-          {
-            const int f_index =
-              weights.shape.LinearIndex({d_ch % weights.shape.o, y, x, d_ch / weights.shape.o});
-            filter_val[i] = weights.data[f_index];
-          }
-          else
-          {
-            filter_val[i] = 0.0f;
-          }
-        }
-        dst[counter++] = filter_val;
-      }
-    }
-  }
-}
-
-template <DataType T>
-void UploadWeightsForDWConv2D(const InternalTensor<OHWI, T> &weights, bool weights_are_buffer,
-                              CalculationsPrecision precision, GPUOperation *op)
-{
-  const int dst_channels = weights.shape.i * weights.shape.o;
-  const int dst_slices = DivideRoundUp(dst_channels, 4);
-  const int kernel_x = weights.shape.w;
-  const int kernel_y = weights.shape.h;
-
-  const int elements_count = kernel_x * kernel_y * dst_slices;
-
-  const bool fp32_weights = precision == CalculationsPrecision::F32;
-  const int float4_size = fp32_weights ? 16 : 8;
-
-  std::vector<uint8_t> data(float4_size * elements_count);
-
-  if (fp32_weights)
-  {
-    float4 *ptr = reinterpret_cast<float4 *>(data.data());
-    RearrangeWeightsForDWConv2D(weights, absl::MakeSpan(ptr, elements_count));
-  }
-  // TODO
-  // It doesn't support F16 yet. I will try to add it later.
-  //
-  // else {
-  //   half4* ptr = reinterpret_cast<half4*>(data.data());
-  //   RearrangeWeightsForDWConv2D(weights, absl::MakeSpan(ptr, elements_count));
-  // }
-
-  if (weights_are_buffer)
-  {
-    BufferDescriptor desc;
-    desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
-    desc.element_size = 4;
-    desc.size = float4_size * elements_count;
-    desc.data = std::move(data);
-    op->args_.AddObject("weights", absl::make_unique<BufferDescriptor>(desc));
-  }
-  else
-  {
-    Texture2DDescriptor desc;
-    desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
-    desc.size = int2(kernel_x * kernel_y, dst_slices);
-    desc.data = std::move(data);
-    op->args_.AddObject("weights", absl::make_unique<Texture2DDescriptor>(desc));
-  }
-}
-
-template <DataType S, typename T>
-void RearrangeWeightsForDWConv3D(const InternalTensor<OHWDI, S> &weights, absl::Span<T> dst)
-{
-  const int dst_channels = weights.shape.i * weights.shape.o;
-  const int dst_slices = DivideRoundUp(dst_channels, 4);
-  const int kernel_x = weights.shape.w;
-  const int kernel_y = weights.shape.h;
-  const int kernel_z = weights.shape.d;
-
-  int counter = 0;
-  for (int d = 0; d < dst_slices; ++d)
-  {
-    for (int z = 0; z < kernel_z; ++z)
-    {
-      for (int y = 0; y < kernel_y; ++y)
-      {
-        for (int x = 0; x < kernel_x; ++x)
-        {
-          T filter_val;
-          for (int i = 0; i < 4; ++i)
-          {
-            const int d_ch = d * 4 + i;
-            if (d_ch < dst_channels)
-            {
-              const int f_index = weights.shape.LinearIndex(
-                {d_ch % weights.shape.o, y, x, z, d_ch / weights.shape.o});
-              filter_val[i] = weights.data[f_index];
-            }
-            else
-            {
-              filter_val[i] = 0.0f;
-            }
-          }
-          dst[counter++] = filter_val;
-        }
-      }
-    }
-  }
-}
-
-template <DataType T>
-void UploadWeightsForDWConv3D(const InternalTensor<OHWDI, T> &weights, bool weights_are_buffer,
-                              CalculationsPrecision precision, GPUOperation *op)
-{
-  const int dst_channels = weights.shape.i * weights.shape.o;
-  const int dst_slices = DivideRoundUp(dst_channels, 4);
-  const int kernel_x = weights.shape.w;
-  const int kernel_y = weights.shape.h;
-  const int kernel_z = weights.shape.d;
-
-  const int elements_count = kernel_x * kernel_y * kernel_z * dst_slices;
-
-  const bool fp32_weights = precision == CalculationsPrecision::F32;
-  const int float4_size = fp32_weights ? 16 : 8;
-
-  std::vector<uint8_t> data(float4_size * elements_count);
-
-  if (fp32_weights)
-  {
-    float4 *ptr = reinterpret_cast<float4 *>(data.data());
-    RearrangeWeightsForDWConv3D(weights, absl::MakeSpan(ptr, elements_count));
-  }
-  // TODO
-  // It doesn't support F16 yet. I will try to add it later.
-  //
-  // else {
-  //   half4* ptr = reinterpret_cast<half4*>(data.data());
-  //   RearrangeWeightsForDWConv3D(weights, absl::MakeSpan(ptr, elements_count));
-  // }
-
-  if (weights_are_buffer)
-  {
-    BufferDescriptor desc;
-    desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
-    desc.element_size = 4;
-    desc.size = float4_size * elements_count;
-    desc.data = std::move(data);
-    op->args_.AddObject("weights", absl::make_unique<BufferDescriptor>(std::move(desc)));
-  }
-  else
-  {
-    Texture2DDescriptor desc;
-    desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
-    desc.size = int2(kernel_x * kernel_y * kernel_z, dst_slices);
-    desc.data = std::move(data);
-    op->args_.AddObject("weights", absl::make_unique<Texture2DDescriptor>(std::move(desc)));
-  }
-}
-
-GPUOperation CreateDepthwiseConvolution2D(const DeviceInfo &device_info,
-                                          const OperationDef &definition,
-                                          const DepthwiseConvolution2DAttributes &attr);
-
-GPUOperation
-CreateDepthwiseConvolution2DDynamicWeights(const DeviceInfo &device_info,
-                                           const OperationDef &definition,
-                                           const DepthwiseConvolution2DAttributes &attr);
-
-GPUOperation CreateDepthwiseConvolution3D(const DeviceInfo &device_info,
-                                          const OperationDef &definition,
-                                          const DepthwiseConvolution3DAttributes &attr);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_DEPTHWISE_CONV_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv3x3.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv3x3.cc
deleted file mode 100644
index 89a14f14d..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv3x3.cc
+++ /dev/null
@@ -1,358 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DepthwiseConv3x3.h"
-
-#include <string>
-#include <utility>
-
-#include "open_cl/kernels/Util.h"
-#include "open_cl/kernels/WorkGroupPicking.h"
-#include "open_cl/Precision.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-DepthwiseConv3x3::DepthwiseConv3x3(const OperationDef &definition, bool weights_are_buffer,
-                                   bool local_mem_uploads, const DeviceInfo &device_info)
-  : GPUOperation(definition), local_mem_uploads_(local_mem_uploads)
-{
-  work_group_size_ = int3(8, 4, 1);
-  code_ = GenerateDepthwiseConvCode(definition_, weights_are_buffer, local_mem_uploads_);
-
-  if (definition_.precision == CalculationsPrecision::F16 && device_info.IsPowerVR())
-  {
-    compiler_options_.push_back(CompilerOptions::POWERVR_FP16);
-  }
-}
-
-DepthwiseConv3x3::DepthwiseConv3x3(DepthwiseConv3x3 &&operation)
-  : GPUOperation(std::move(operation)), local_mem_uploads_(operation.local_mem_uploads_)
-{
-}
-
-DepthwiseConv3x3 &DepthwiseConv3x3::operator=(DepthwiseConv3x3 &&operation)
-{
-  if (this != &operation)
-  {
-    std::swap(local_mem_uploads_, operation.local_mem_uploads_);
-    GPUOperation::operator=(std::move(operation));
-  }
-  return *this;
-}
-
-std::string DepthwiseConv3x3::GenerateDepthwiseConvCode(const OperationDef &op_def,
-                                                        bool weights_are_buffer,
-                                                        bool local_mem_uploads)
-{
-  auto src_desc = op_def.src_tensors[0];
-  src_desc.SetTextureAddressMode(TextureAddressMode::ZERO);
-  AddSrcTensor("src_tensor", src_desc);
-  AddDstTensor("dst_tensor", op_def.dst_tensors[0]);
-
-  const auto src_tensor_type = op_def.src_tensors[0].storage_type;
-
-  const bool manual_clamp = src_tensor_type == TensorStorageType::BUFFER ||
-                            src_tensor_type == TensorStorageType::IMAGE_BUFFER;
-
-  std::string c = GetCommonDefines(op_def.precision);
-  if (local_mem_uploads)
-  {
-    c += "__attribute__((reqd_work_group_size(8, 4, 1)))\n";
-  }
-  c += "__kernel void main_function(\n";
-  c += "$0) {\n";
-  if (op_def.dst_tensors[0].HasAxis(Axis::BATCH))
-  {
-    c += "  int linear_id = get_global_id(0);\n";
-    c += "  int X = (linear_id / args.dst_tensor.Batch()) * 2;\n";
-    c += "  int B = linear_id % args.dst_tensor.Batch();\n";
-    c += "  args.dst_tensor.SetBatchRef(B);\n";
-    c += "  args.src_tensor.SetBatchRef(B);\n";
-  }
-  else
-  {
-    c += "  int X = get_global_id(0) * 2;\n";
-  }
-  c += "  int Y = get_global_id(1) * 2;\n";
-  c += "  int S = get_global_id(2);\n";
-  c += "   ACCUM_FLT4 r0 = (ACCUM_FLT4)(0.0f);\n";
-  c += "   ACCUM_FLT4 r1 = (ACCUM_FLT4)(0.0f);\n";
-  c += "   ACCUM_FLT4 r2 = (ACCUM_FLT4)(0.0f);\n";
-  c += "   ACCUM_FLT4 r3 = (ACCUM_FLT4)(0.0f);\n";
-  if (!local_mem_uploads)
-  {
-    c += "  if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() "
-         "|| S >= args.dst_tensor.Slices()) { \n";
-    c += "    return; \n";
-    c += "  } \n";
-  }
-  if (local_mem_uploads)
-  {
-    c += "  __local FLT4 f[10];\n";
-    c += "  event_t e = async_work_group_copy(f, args.weights.GetPtr() + S * "
-         "10, 10, 0);\n";
-    c += "  wait_group_events(1, &e);\n";
-  }
-  else if (weights_are_buffer)
-  {
-    c += "  __global FLT4* f = args.weights.GetPtr() + S * 10;\n";
-  }
-  c += "  FLT4 s0;\n";
-  c += "  FLT4 s1;\n";
-  c += "  FLT4 s2;\n";
-  c += "  FLT4 s3;\n";
-  std::string W[9] = {"f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8"};
-  std::string bias = "bias";
-  std::string xc[4] = {"X - 1", "X", "X + 1", "X + 2"};
-  std::string yc[4] = {"Y - 1", "Y", "Y + 1", "Y + 2"};
-  if (!weights_are_buffer)
-  {
-    c += "   FLT4 f0 = args.weights.Read(0, S);\n";
-    c += "   FLT4 f1 = args.weights.Read(1, S);\n";
-    c += "   FLT4 f2 = args.weights.Read(2, S);\n";
-    c += "   FLT4 f3 = args.weights.Read(3, S);\n";
-    c += "   FLT4 f4 = args.weights.Read(4, S);\n";
-    c += "   FLT4 f5 = args.weights.Read(5, S);\n";
-    c += "   FLT4 f6 = args.weights.Read(6, S);\n";
-    c += "   FLT4 f7 = args.weights.Read(7, S);\n";
-    c += "   FLT4 f8 = args.weights.Read(8, S);\n";
-  }
-  if (manual_clamp)
-  {
-    c += "  int x0 = X - 1;\n";
-    c += "  int x1 = X;\n";
-    c += "  int x2 = X + 1;\n";
-    c += "  int x3 = X + 2;\n";
-    c += "  int y0 = Y - 1;\n";
-    c += "  int y1 = Y;\n";
-    c += "  int y2 = Y + 1;\n";
-    c += "  int y3 = Y + 2;\n";
-    c += "  bool x0_in = x0 >= 0 && x0 < args.dst_tensor.Width();\n";
-    c += "  bool x1_in = x1 >= 0 && x1 < args.dst_tensor.Width();\n";
-    c += "  bool x2_in = x2 >= 0 && x2 < args.dst_tensor.Width();\n";
-    c += "  bool x3_in = x3 >= 0 && x3 < args.dst_tensor.Width();\n";
-    c += "  bool y0_in = y0 >= 0 && y0 < args.dst_tensor.Height();\n";
-    c += "  bool y1_in = y1 >= 0 && y1 < args.dst_tensor.Height();\n";
-    c += "  bool y2_in = y2 >= 0 && y2 < args.dst_tensor.Height();\n";
-    c += "  bool y3_in = y3 >= 0 && y3 < args.dst_tensor.Height();\n";
-    c += "  x0 = clamp(x0, 0, args.dst_tensor.Width() - 1);\n";
-    c += "  x1 = clamp(x1, 0, args.dst_tensor.Width() - 1);\n";
-    c += "  x2 = clamp(x2, 0, args.dst_tensor.Width() - 1);\n";
-    c += "  x3 = clamp(x3, 0, args.dst_tensor.Width() - 1);\n";
-    c += "  y0 = clamp(y0, 0, args.dst_tensor.Height() - 1);\n";
-    c += "  y1 = clamp(y1, 0, args.dst_tensor.Height() - 1);\n";
-    c += "  y2 = clamp(y2, 0, args.dst_tensor.Height() - 1);\n";
-    c += "  y3 = clamp(y3, 0, args.dst_tensor.Height() - 1);\n";
-    if (src_tensor_type == TensorStorageType::BUFFER)
-    {
-      c += "  __global FLT4* src_loc = "
-           "args.src_tensor.GetPtrWithSliceOffset(S);\n";
-    }
-    xc[0] = "x0";
-    xc[1] = "x1";
-    xc[2] = "x2";
-    xc[3] = "x3";
-    yc[0] = "y0";
-    yc[1] = "y1";
-    yc[2] = "y2";
-    yc[3] = "y3";
-  }
-  if (local_mem_uploads || weights_are_buffer)
-  {
-    W[0] = "f[0]";
-    W[1] = "f[1]";
-    W[2] = "f[2]";
-    W[3] = "f[3]";
-    W[4] = "f[4]";
-    W[5] = "f[5]";
-    W[6] = "f[6]";
-    W[7] = "f[7]";
-    W[8] = "f[8]";
-    bias = "f[9]";
-  }
-  auto read_4x_line = [&](int y) {
-    if (src_tensor_type == TensorStorageType::BUFFER)
-    {
-      const std::string y_in = "y" + std::to_string(y) + "_in";
-      c += "    s0 = src_loc[args.src_tensor.GetWHOffset(" + xc[0] + ", " + yc[y] +
-           ")] * (FLT)(x0_in && " + y_in + ");\n";
-      c += "    s1 = src_loc[args.src_tensor.GetWHOffset(" + xc[1] + ", " + yc[y] +
-           ")] * (FLT)(x1_in && " + y_in + ");\n";
-      c += "    s2 = src_loc[args.src_tensor.GetWHOffset(" + xc[2] + ", " + yc[y] +
-           ")] * (FLT)(x2_in && " + y_in + ");\n";
-      c += "    s3 = src_loc[args.src_tensor.GetWHOffset(" + xc[3] + ", " + yc[y] +
-           ")] * (FLT)(x3_in && " + y_in + ");\n";
-    }
-    else if (src_tensor_type == TensorStorageType::IMAGE_BUFFER)
-    {
-      const std::string y_in = "y" + std::to_string(y) + "_in";
-      c += "    s0 = args.src_tensor.Read(" + xc[0] + ", " + yc[y] + ", S) * (FLT)(x0_in && " +
-           y_in + ");\n";
-      c += "    s1 = args.src_tensor.Read(" + xc[1] + ", " + yc[y] + ", S) * (FLT)(x1_in && " +
-           y_in + ");\n";
-      c += "    s2 = args.src_tensor.Read(" + xc[2] + ", " + yc[y] + ", S) * (FLT)(x2_in && " +
-           y_in + ");\n";
-      c += "    s3 = args.src_tensor.Read(" + xc[3] + ", " + yc[y] + ", S) * (FLT)(x3_in && " +
-           y_in + ");\n";
-    }
-    else
-    {
-      c += "    s0 = args.src_tensor.Read(" + xc[0] + ", " + yc[y] + ", S);\n";
-      c += "    s1 = args.src_tensor.Read(" + xc[1] + ", " + yc[y] + ", S);\n";
-      c += "    s2 = args.src_tensor.Read(" + xc[2] + ", " + yc[y] + ", S);\n";
-      c += "    s3 = args.src_tensor.Read(" + xc[3] + ", " + yc[y] + ", S);\n";
-    }
-  };
-  c += "  {\n";
-  read_4x_line(0);
-  c += "    r0 += TO_ACCUM_TYPE(" + W[0] + " * s0);\n";
-  c += "    r0 += TO_ACCUM_TYPE(" + W[1] + " * s1);\n";
-  c += "    r1 += TO_ACCUM_TYPE(" + W[0] + " * s1);\n";
-  c += "    r0 += TO_ACCUM_TYPE(" + W[2] + " * s2);\n";
-  c += "    r1 += TO_ACCUM_TYPE(" + W[1] + " * s2);\n";
-  c += "    r1 += TO_ACCUM_TYPE(" + W[2] + " * s3);\n";
-  c += "  }\n";
-  c += "  {\n";
-  read_4x_line(1);
-  c += "    r0 += TO_ACCUM_TYPE(" + W[3] + " * s0);\n";
-  c += "    r2 += TO_ACCUM_TYPE(" + W[0] + " * s0);\n";
-  c += "    r0 += TO_ACCUM_TYPE(" + W[4] + " * s1);\n";
-  c += "    r1 += TO_ACCUM_TYPE(" + W[3] + " * s1);\n";
-  c += "    r2 += TO_ACCUM_TYPE(" + W[1] + " * s1);\n";
-  c += "    r3 += TO_ACCUM_TYPE(" + W[0] + " * s1);\n";
-  c += "    r0 += TO_ACCUM_TYPE(" + W[5] + " * s2);\n";
-  c += "    r1 += TO_ACCUM_TYPE(" + W[4] + " * s2);\n";
-  c += "    r2 += TO_ACCUM_TYPE(" + W[2] + " * s2);\n";
-  c += "    r3 += TO_ACCUM_TYPE(" + W[1] + " * s2);\n";
-  c += "    r1 += TO_ACCUM_TYPE(" + W[5] + " * s3);\n";
-  c += "    r3 += TO_ACCUM_TYPE(" + W[2] + " * s3);\n";
-  c += "  }\n";
-  c += "  {\n";
-  read_4x_line(2);
-  c += "    r0 += TO_ACCUM_TYPE(" + W[6] + " * s0);\n";
-  c += "    r2 += TO_ACCUM_TYPE(" + W[3] + " * s0);\n";
-  c += "    r0 += TO_ACCUM_TYPE(" + W[7] + " * s1);\n";
-  c += "    r1 += TO_ACCUM_TYPE(" + W[6] + " * s1);\n";
-  c += "    r2 += TO_ACCUM_TYPE(" + W[4] + " * s1);\n";
-  c += "    r3 += TO_ACCUM_TYPE(" + W[3] + " * s1);\n";
-  c += "    r0 += TO_ACCUM_TYPE(" + W[8] + " * s2);\n";
-  c += "    r1 += TO_ACCUM_TYPE(" + W[7] + " * s2);\n";
-  c += "    r2 += TO_ACCUM_TYPE(" + W[5] + " * s2);\n";
-  c += "    r3 += TO_ACCUM_TYPE(" + W[4] + " * s2);\n";
-  c += "    r1 += TO_ACCUM_TYPE(" + W[8] + " * s3);\n";
-  c += "    r3 += TO_ACCUM_TYPE(" + W[5] + " * s3);\n";
-  c += "  }\n";
-  c += "  {\n";
-  read_4x_line(3);
-  c += "    r2 += TO_ACCUM_TYPE(" + W[6] + " * s0);\n";
-  c += "    r2 += TO_ACCUM_TYPE(" + W[7] + " * s1);\n";
-  c += "    r3 += TO_ACCUM_TYPE(" + W[6] + " * s1);\n";
-  c += "    r2 += TO_ACCUM_TYPE(" + W[8] + " * s2);\n";
-  c += "    r3 += TO_ACCUM_TYPE(" + W[7] + " * s2);\n";
-  c += "    r3 += TO_ACCUM_TYPE(" + W[8] + " * s3);\n";
-  c += "  }\n";
-  if (!weights_are_buffer)
-  {
-    c += "   FLT4 bias = args.weights.Read(9, S);\n";
-  }
-  c += "  r0 += TO_ACCUM_TYPE(" + bias + ");\n";
-  c += "  r1 += TO_ACCUM_TYPE(" + bias + ");\n";
-  c += "  r2 += TO_ACCUM_TYPE(" + bias + ");\n";
-  c += "  r3 += TO_ACCUM_TYPE(" + bias + ");\n";
-  if (local_mem_uploads)
-  {
-    c += "  if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() "
-         "|| S >= args.dst_tensor.Slices()) { \n";
-    c += "    return; \n";
-    c += "  } \n";
-  }
-  c += "  if(X + 0 < args.dst_tensor.Width() && Y + 0 < "
-       "args.dst_tensor.Height()) {\n";
-  c += "    FLT4 result = TO_FLT4(r0);\n";
-  c += "    args.dst_tensor.Write(result, X + 0, Y + 0, S)\n";
-  c += "  }\n";
-  c += "  if(X + 1 < args.dst_tensor.Width() && Y + 0 < "
-       "args.dst_tensor.Height()) {\n";
-  c += "    FLT4 result = TO_FLT4(r1);\n";
-  c += "    args.dst_tensor.Write(result, X + 1, Y + 0, S)\n";
-  c += "  }\n";
-  c += "  if(X + 0 < args.dst_tensor.Width() && Y + 1 < "
-       "args.dst_tensor.Height()) {\n";
-  c += "    FLT4 result = TO_FLT4(r2);\n";
-  c += "    args.dst_tensor.Write(result, X + 0, Y + 1, S)\n";
-  c += "  }\n";
-  c += "  if(X + 1 < args.dst_tensor.Width() && Y + 1 < "
-       "args.dst_tensor.Height()) {\n";
-  c += "    FLT4 result = TO_FLT4(r3);\n";
-  c += "    args.dst_tensor.Write(result, X + 1, Y + 1, S)\n";
-  c += "  }\n";
-  c += "}\n";
-
-  return c;
-}
-
-int3 DepthwiseConv3x3::GetGridSize() const
-{
-  const int grid_x = DivideRoundUp(dst_[0]->Width(), 2) * dst_[0]->Batch();
-  const int grid_y = DivideRoundUp(dst_[0]->Height(), 2);
-  const int grid_z = dst_[0]->Slices();
-  return int3(grid_x, grid_y, grid_z);
-}
-
-void DepthwiseConv3x3::GetPossibleKernelWorkGroups(TuningType tuning_type,
-                                                   const DeviceInfo &device_info,
-                                                   const KernelInfo &kernel_info,
-                                                   std::vector<int3> *work_groups) const
-{
-  if (local_mem_uploads_)
-  {
-    work_groups->push_back(work_group_size_);
-  }
-  else
-  {
-    GetPossibleWorkGroups(tuning_type, device_info, kernel_info, grid_size_, work_groups);
-  }
-}
-
-bool IsDepthwiseConv3x3Supported(const DepthwiseConvolution2DAttributes &attr)
-{
-  return attr.weights.shape.o == 1 && attr.dilations.w == 1 && attr.dilations.h == 1 &&
-         attr.weights.shape.w == 3 && attr.weights.shape.h == 3 && attr.strides.w == 1 &&
-         attr.strides.h == 1 && attr.padding.prepended.w == 1 && attr.padding.prepended.h == 1 &&
-         attr.padding.appended.w == 1 && attr.padding.appended.h == 1;
-}
-
-DepthwiseConv3x3 CreateDepthwiseConv3x3(const DeviceInfo &device_info,
-                                        const OperationDef &definition,
-                                        const DepthwiseConvolution2DAttributes &attr)
-{
-  bool weights_are_buffer = device_info.IsPowerVR() || device_info.IsMali();
-  bool local_mem_uploads = weights_are_buffer && device_info.IsPowerVR();
-  DepthwiseConv3x3 result(definition, weights_are_buffer, local_mem_uploads, device_info);
-  result.UploadWeightsAndBiases(attr.weights, attr.bias, weights_are_buffer);
-  return result;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv3x3.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv3x3.h
deleted file mode 100644
index 8c571105a..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/DepthwiseConv3x3.h
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_DEPTHWISE_CONV_3X3_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_DEPTHWISE_CONV_3X3_H__
-
-#include <memory>
-#include <vector>
-
-#include "open_cl/Buffer.h"
-#include "open_cl/kernels/GpuOperation.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Texture2d.h"
-#include "open_cl/Util.h"
-#include "open_cl/DataType.h"
-#include "open_cl/Operations.h"
-#include "open_cl/Shape.h"
-#include "open_cl/Status.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Types.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-class DepthwiseConv3x3 : public GPUOperation
-{
-public:
-  DepthwiseConv3x3() = default;
-  void GetPossibleKernelWorkGroups(TuningType tuning_type, const DeviceInfo &device_info,
-                                   const KernelInfo &kernel_info,
-                                   std::vector<int3> *work_groups) const override;
-  int3 GetGridSize() const override;
-
-  // Move only
-  DepthwiseConv3x3(DepthwiseConv3x3 &&operation);
-  DepthwiseConv3x3 &operator=(DepthwiseConv3x3 &&operation);
-  DepthwiseConv3x3(const DepthwiseConv3x3 &) = delete;
-  DepthwiseConv3x3 &operator=(const DepthwiseConv3x3 &) = delete;
-
-private:
-  explicit DepthwiseConv3x3(const OperationDef &definition, bool weights_are_buffer,
-                            bool local_mem_uploads, const DeviceInfo &device_info);
-  template <DataType T>
-  void UploadWeightsAndBiases(const InternalTensor<OHWI, T> &weights,
-                              const InternalTensor<Linear, T> &biases, bool weights_are_buffer);
-
-  friend DepthwiseConv3x3 CreateDepthwiseConv3x3(const DeviceInfo &device_info,
-                                                 const OperationDef &definition,
-                                                 const DepthwiseConvolution2DAttributes &attr);
-
-  template <DataType S, typename T>
-  void RearrangeWeightsAndBiasesData(const InternalTensor<OHWI, S> &weights,
-                                     const InternalTensor<Linear, S> &biases, absl::Span<T> dst);
-
-  std::string GenerateDepthwiseConvCode(const OperationDef &op_def, bool weights_are_buffer,
-                                        bool local_mem_uploads);
-
-  bool local_mem_uploads_;
-};
-
-template <DataType T>
-void DepthwiseConv3x3::UploadWeightsAndBiases(const InternalTensor<OHWI, T> &weights,
-                                              const InternalTensor<Linear, T> &biases,
-                                              bool weights_are_buffer)
-{
-  const int src_depth = DivideRoundUp(weights.shape.i, 4);
-  int texture_width = 10; // 3x3 kernel + 1 bias
-  int texture_height = src_depth;
-  const int elements_count = texture_width * texture_height;
-  const bool fp32_weights = definition_.precision == CalculationsPrecision::F32;
-  const int float4_size = fp32_weights ? 16 : 8;
-
-  std::vector<uint8_t> data(float4_size * elements_count);
-  if (fp32_weights)
-  {
-    float4 *ptr = reinterpret_cast<float4 *>(data.data());
-    RearrangeWeightsAndBiasesData(weights, biases, absl::MakeSpan(ptr, elements_count));
-  }
-  // TODO
-  // It doesn't support F16 yet. I will try to add it later.
-  //
-  // else {
-  //   half4* ptr = reinterpret_cast<half4*>(data.data());
-  //   RearrangeWeightsAndBiasesData(weights, biases,
-  //                                 absl::MakeSpan(ptr, elements_count));
-  // }
-
-  if (weights_are_buffer)
-  {
-    BufferDescriptor desc;
-    desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
-    desc.element_size = 4;
-    desc.size = float4_size * elements_count;
-    desc.data = std::move(data);
-    args_.AddObject("weights", absl::make_unique<BufferDescriptor>(std::move(desc)));
-  }
-  else
-  {
-    Texture2DDescriptor desc;
-    desc.element_type = fp32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
-    desc.size = int2(texture_width, texture_height);
-    desc.data = std::move(data);
-    args_.AddObject("weights", absl::make_unique<Texture2DDescriptor>(std::move(desc)));
-  }
-}
-
-template <DataType S, typename T>
-void DepthwiseConv3x3::RearrangeWeightsAndBiasesData(const InternalTensor<OHWI, S> &weights,
-                                                     const InternalTensor<Linear, S> &biases,
-                                                     absl::Span<T> dst)
-{
-  const int src_depth = DivideRoundUp(weights.shape.i, 4);
-
-  int counter = 0;
-  for (int s = 0; s < src_depth; ++s)
-  {
-    for (int y = 0; y < 3; ++y)
-    {
-      for (int x = 0; x < 3; ++x)
-      {
-        T filter_val;
-        for (int i = 0; i < 4; ++i)
-        {
-          const int s_ch = s * 4 + i;
-          if (s_ch < weights.shape.i)
-          {
-            const int f_index = weights.shape.LinearIndex({0, y, x, s_ch});
-            filter_val[i] = weights.data[f_index];
-          }
-          else
-          {
-            filter_val[i] = 0.0f;
-          }
-        }
-        dst[counter++] = filter_val;
-      }
-    }
-
-    T bias_val;
-    for (int i = 0; i < 4; ++i)
-    {
-      const int dst_ch = s * 4 + i;
-      bias_val[i] = dst_ch >= biases.shape.v ? 0.0f : biases.data[dst_ch];
-    }
-    dst[counter++] = bias_val;
-  }
-}
-
-bool IsDepthwiseConv3x3Supported(const DepthwiseConvolution2DAttributes &attr);
-
-DepthwiseConv3x3 CreateDepthwiseConv3x3(const DeviceInfo &device_info,
-                                        const OperationDef &definition,
-                                        const DepthwiseConvolution2DAttributes &attr);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_DEPTHWISE_CONV_3X3_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/GpuOperation.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/GpuOperation.cc
deleted file mode 100644
index 8839d9687..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/GpuOperation.cc
+++ /dev/null
@@ -1,385 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "GpuOperation.h"
-
-#include "Util.h"
-#include "WorkGroupPicking.h"
-#include "open_cl/AccessType.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-std::string GetElementWiseCode(const OperationDef &op_def, bool check_src_slices)
-{
-  std::string c = GetCommonDefines(op_def.precision);
-
-  c += "__kernel void main_function(\n";
-  c += "$0) {\n";
-  c += "  int X = get_global_id(0);\n";
-  c += "  int Y = get_global_id(1);\n";
-  c += "  int Z = get_global_id(2);\n";
-  c += "  if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || "
-       "Z >= args.dst_tensor.Slices()) return; \n";
-  if (check_src_slices)
-  {
-    c += "  FLT4 src = (FLT4)(0.0f);\n";
-    c += "  if (Z < args.src_tensor.Slices()) {\n";
-    c += "    src = args.src_tensor.Read(X, Y, Z);\n";
-    c += "  }\n";
-  }
-  else
-  {
-    c += "  FLT4 src = args.src_tensor.Read(X, Y, Z);\n";
-  }
-  c += "  args.dst_tensor.Write(src, X, Y, Z);\n";
-  c += "} \n";
-  return c;
-}
-
-int3 GetWorkGroupsCount(int grid_dimension, const int3 &grid_size, const int3 &work_group_size,
-                        const int3 &work_group_launch_order)
-{
-  int3 work_groups_count;
-  if (grid_dimension == 1)
-  {
-    work_groups_count.x = DivideRoundUp(grid_size.x, work_group_size.x);
-    work_groups_count.y = 1;
-    work_groups_count.z = 1;
-  }
-  else if (grid_dimension == 2)
-  {
-    int3 wgs;
-    wgs.x = DivideRoundUp(grid_size.x, work_group_size.x);
-    wgs.y = DivideRoundUp(grid_size.y, work_group_size.y);
-    work_groups_count.x = wgs[work_group_launch_order[0]];
-    work_groups_count.y = wgs[work_group_launch_order[1]];
-    work_groups_count.z = 1;
-  }
-  else
-  { // grid_dimension == 3
-    int3 wgs;
-    wgs.x = DivideRoundUp(grid_size.x, work_group_size.x);
-    wgs.y = DivideRoundUp(grid_size.y, work_group_size.y);
-    wgs.z = DivideRoundUp(grid_size.z, work_group_size.z);
-    work_groups_count.x = wgs[work_group_launch_order[0]];
-    work_groups_count.y = wgs[work_group_launch_order[1]];
-    work_groups_count.z = wgs[work_group_launch_order[2]];
-  }
-  return work_groups_count;
-}
-
-} // namespace
-
-DataType OperationDef::GetDataType() const { return DeduceDataTypeFromPrecision(precision); }
-
-DataType OperationDef::GetPrimaryDataType() const { return src_tensors[0].data_type; }
-TensorStorageType OperationDef::GetPrimaryStorageType() const
-{
-  return src_tensors[0].storage_type;
-}
-
-bool OperationDef::IsBatchSupported() const
-{
-  for (const auto &src : src_tensors)
-  {
-    if (HasAxis(src.layout, Axis::BATCH))
-    {
-      return true;
-    }
-  }
-  for (const auto &dst : dst_tensors)
-  {
-    if (HasAxis(dst.layout, Axis::BATCH))
-    {
-      return true;
-    }
-  }
-  return false;
-}
-
-GPUOperation::GPUOperation(const OperationDef &definition) : definition_(definition) {}
-
-void GPUOperation::SetSrc(Tensor *ptr, int index)
-{
-  if (index >= (int)src_.size())
-  {
-    src_.resize(index + 1, nullptr);
-  }
-  src_[index] = ptr;
-}
-
-void GPUOperation::SetDst(Tensor *ptr, int index)
-{
-  if (index >= (int)dst_.size())
-  {
-    dst_.resize(index + 1, nullptr);
-  }
-  dst_[index] = ptr;
-}
-
-GPUOperation::GPUOperation(GPUOperation &&operation)
-  : args_(std::move(operation.args_)), code_(std::move(operation.code_)),
-    work_group_size_(operation.work_group_size_),
-    compiler_options_(std::move(operation.compiler_options_)),
-    tensor_to_grid_(operation.tensor_to_grid_), elementwise_(operation.elementwise_),
-    linkable_(operation.linkable_), check_src_channels_size_(operation.check_src_channels_size_),
-    definition_(std::move(operation.definition_)), src_(std::move(operation.src_)),
-    dst_(std::move(operation.dst_)), kernel_(std::move(operation.kernel_)),
-    grid_dimension_(operation.grid_dimension_),
-    work_group_launch_order_(operation.work_group_launch_order_), grid_size_(operation.grid_size_),
-    src_tensors_names_(std::move(operation.src_tensors_names_)),
-    dst_tensors_names_(std::move(operation.dst_tensors_names_)),
-    work_groups_count_(operation.work_groups_count_), linkable_count_(operation.linkable_count_),
-    elementwise_code_(std::move(operation.elementwise_code_))
-{
-}
-
-GPUOperation &GPUOperation::operator=(GPUOperation &&operation)
-{
-  if (this != &operation)
-  {
-    args_ = std::move(operation.args_);
-    code_ = std::move(operation.code_);
-    std::swap(work_group_size_, operation.work_group_size_);
-    compiler_options_ = std::move(operation.compiler_options_);
-    tensor_to_grid_ = operation.tensor_to_grid_;
-    elementwise_ = operation.elementwise_;
-    linkable_ = operation.linkable_;
-    check_src_channels_size_ = operation.check_src_channels_size_;
-    definition_ = std::move(operation.definition_);
-    src_ = std::move(operation.src_);
-    dst_ = std::move(operation.dst_);
-    kernel_ = std::move(operation.kernel_);
-    std::swap(grid_dimension_, operation.grid_dimension_);
-    std::swap(work_group_launch_order_, operation.work_group_launch_order_);
-    std::swap(grid_size_, operation.grid_size_);
-    src_tensors_names_ = std::move(operation.src_tensors_names_);
-    dst_tensors_names_ = std::move(operation.dst_tensors_names_);
-    std::swap(work_groups_count_, operation.work_groups_count_);
-    std::swap(linkable_count_, operation.linkable_count_);
-    elementwise_code_ = std::move(operation.elementwise_code_);
-  }
-  return *this;
-}
-
-absl::Status GPUOperation::AddOperation(GPUOperation *operation)
-{
-  linkable_count_ += 1;
-  std::string code = operation->code_;
-  std::string unique_postfix = absl::StrCat("_link", linkable_count_);
-  operation->args_.RenameArgs(unique_postfix, &code);
-  elementwise_code_ += "{\n" + code + "\n}\n";
-  RETURN_IF_ERROR(args_.Merge(std::move(operation->args_), unique_postfix));
-  for (size_t i = 0; i < operation->src_tensors_names_.size(); ++i)
-  {
-    definition_.src_tensors.push_back(operation->definition_.src_tensors[i + 1]);
-    src_tensors_names_.push_back(operation->src_tensors_names_[i] + unique_postfix);
-  }
-  for (size_t i = 0; i < operation->dst_tensors_names_.size(); ++i)
-  {
-    dst_tensors_names_.push_back(operation->dst_tensors_names_[i] + unique_postfix);
-  }
-  return absl::OkStatus();
-}
-
-void GPUOperation::AddSrcTensor(const std::string &tensor_name, const TensorDescriptor &desc)
-{
-  src_tensors_names_.push_back(tensor_name);
-  auto desc_new = std::make_unique<TensorDescriptor>(desc);
-  args_.AddObjectRef(tensor_name, AccessType::READ, std::move(desc_new));
-}
-
-void GPUOperation::AddSrcBuffer(const std::string &buffer_name, const BufferDescriptor &desc)
-{
-  src_tensors_names_.push_back(buffer_name);
-  auto desc_new = std::make_unique<BufferDescriptor>(desc);
-  args_.AddObjectRef(buffer_name, AccessType::READ, std::move(desc_new));
-}
-
-void GPUOperation::AddDstTensor(const std::string &tensor_name, const TensorDescriptor &desc)
-{
-  dst_tensors_names_.push_back(tensor_name);
-  auto desc_new = std::make_unique<TensorDescriptor>(desc);
-  args_.AddObjectRef(tensor_name, AccessType::WRITE, std::move(desc_new));
-}
-
-absl::Status GPUOperation::UpdateParams()
-{
-  for (size_t i = 0; i < src_tensors_names_.size(); ++i)
-  {
-    RETURN_IF_ERROR(args_.SetObjectRef(src_tensors_names_[i], src_[i]));
-  }
-  for (size_t i = 0; i < dst_tensors_names_.size(); ++i)
-  {
-    RETURN_IF_ERROR(args_.SetObjectRef(dst_tensors_names_[i], dst_[i]));
-  }
-  RETURN_IF_ERROR(BindArguments(&args_));
-  grid_size_ = GetGridSize();
-  work_groups_count_ =
-    GetWorkGroupsCount(grid_dimension_, grid_size_, work_group_size_, work_group_launch_order_);
-  return absl::OkStatus();
-}
-
-absl::Status GPUOperation::AssembleCode(const DeviceInfo &device_info, CLContext *context)
-{
-  if (elementwise_)
-  {
-    auto src_desc = absl::make_unique<TensorDescriptor>(definition_.src_tensors[0]);
-    if (definition_.IsBatchSupported())
-    {
-      src_desc->SetStateVar("BatchedWidth", "true");
-    }
-    src_tensors_names_.insert(src_tensors_names_.begin(), "src_tensor");
-    args_.AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc));
-
-    auto dst_desc = absl::make_unique<TensorDescriptor>(definition_.dst_tensors[0]);
-    if (definition_.IsBatchSupported())
-    {
-      dst_desc->SetStateVar("BatchedWidth", "true");
-    }
-    dst_tensors_names_.insert(dst_tensors_names_.begin(), "dst_tensor");
-    args_.AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc));
-
-    elementwise_code_ = "{\n" + code_ + "\n}\n" + elementwise_code_;
-    code_ = GetElementWiseCode(definition_, check_src_channels_size_);
-    RETURN_IF_ERROR(args_.AllocateObjects(context));
-    RETURN_IF_ERROR(
-      args_.TransformToCLCode(device_info, {{dst_tensors_names_[0], elementwise_code_}}, &code_));
-  }
-  else
-  {
-    RETURN_IF_ERROR(args_.AllocateObjects(context));
-    RETURN_IF_ERROR(
-      args_.TransformToCLCode(device_info, {{dst_tensors_names_[0], elementwise_code_}}, &code_));
-  }
-  return absl::OkStatus();
-}
-
-absl::Status GPUOperation::Compile(const CreationContext &creation_context)
-{
-  RETURN_IF_ERROR(AssembleCode(creation_context.GetDeviceInfo(), creation_context.context));
-  RETURN_IF_ERROR(creation_context.cache->GetOrCreateCLKernel(
-    code_, "main_function", compiler_options_, *creation_context.context, *creation_context.device,
-    &kernel_));
-  return PostCompileCheck(creation_context.device->info_, kernel_.info_);
-}
-
-absl::Status GPUOperation::CompileDeserialized(const CreationContext &creation_context)
-{
-  return creation_context.cache->GetOrCreateCLKernel(code_, "main_function", compiler_options_,
-                                                     *creation_context.context,
-                                                     *creation_context.device, &kernel_);
-}
-
-void GPUOperation::GetPossibleKernelWorkGroups(TuningType tuning_type,
-                                               const DeviceInfo &device_info,
-                                               const KernelInfo &kernel_info,
-                                               std::vector<int3> *work_groups) const
-{
-  GetPossibleWorkGroups(tuning_type, device_info, kernel_info, grid_size_, work_groups);
-}
-
-absl::Status GPUOperation::Tune(const TuningParameters &params)
-{
-  std::vector<int3> possible_work_groups;
-  GetPossibleKernelWorkGroups(params.tuning_type, *params.info, kernel_.info_,
-                              &possible_work_groups);
-  if (possible_work_groups.empty())
-  {
-    return absl::NotFoundError("Can not found work_group size to launch kernel");
-  }
-  if (possible_work_groups.size() == 1)
-  {
-    work_group_size_ = possible_work_groups[0];
-    work_groups_count_ =
-      GetWorkGroupsCount(grid_dimension_, grid_size_, work_group_size_, work_group_launch_order_);
-    return absl::OkStatus();
-  }
-  else
-  {
-    std::vector<int3> work_groups_count(possible_work_groups.size());
-    for (size_t i = 0; i < work_groups_count.size(); ++i)
-    {
-      work_groups_count[i] = GetWorkGroupsCount(grid_dimension_, grid_size_,
-                                                possible_work_groups[i], work_group_launch_order_);
-    }
-    RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
-    int best_work_group_index;
-    RETURN_IF_ERROR(params.queue->GetBestWorkGroupIndex(
-      kernel_, *params.info, work_groups_count, possible_work_groups, &best_work_group_index));
-    work_group_size_ = possible_work_groups[best_work_group_index];
-    work_groups_count_ =
-      GetWorkGroupsCount(grid_dimension_, grid_size_, work_group_size_, work_group_launch_order_);
-    return absl::OkStatus();
-  }
-}
-
-int3 GPUOperation::GetGridSize() const
-{
-  if (elementwise_ || tensor_to_grid_ == TensorToGrid::kWBToX_HDToY_SToZ)
-  {
-    const int grid_x = dst_[0]->Width() * dst_[0]->Batch();
-    const int grid_y = dst_[0]->Height() * dst_[0]->Depth();
-    const int grid_z = dst_[0]->Slices();
-    return int3(grid_x, grid_y, grid_z);
-  }
-  if (tensor_to_grid_ == TensorToGrid::kWBToX_HDToY_ZIs1)
-  {
-    const int grid_x = dst_[0]->Width() * dst_[0]->Batch();
-    const int grid_y = dst_[0]->Height() * dst_[0]->Depth();
-    const int grid_z = 1;
-    return int3(grid_x, grid_y, grid_z);
-  }
-  if (tensor_to_grid_ == TensorToGrid::kWBToX_HToY_DToZ)
-  {
-    const int grid_x = dst_[0]->Width() * dst_[0]->Batch();
-    const int grid_y = dst_[0]->Height();
-    const int grid_z = dst_[0]->Depth();
-    return int3(grid_x, grid_y, grid_z);
-  }
-  if (tensor_to_grid_ == TensorToGrid::kBToX_YIs1_ZIs1)
-  {
-    const int grid_x = dst_[0]->Batch();
-    const int grid_y = 1;
-    const int grid_z = 1;
-    return int3(grid_x, grid_y, grid_z);
-  }
-  return grid_size_;
-}
-
-void GPUOperation::AddUniquePostfix(const std::string &unique_postfix)
-{
-  for (uint32_t i = 0; i < src_tensors_names_.size(); ++i)
-  {
-    src_tensors_names_[i] += unique_postfix;
-  }
-  for (uint32_t i = 0; i < dst_tensors_names_.size(); ++i)
-  {
-    dst_tensors_names_[i] += unique_postfix;
-  }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/GpuOperation.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/GpuOperation.h
deleted file mode 100644
index 4f531c629..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/GpuOperation.h
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_GPU_OPERATION_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_GPU_OPERATION_H__
-
-#include <string>
-#include <vector>
-
-#include "TuningParameters.h"
-
-#include "open_cl/Arguments.h"
-#include "open_cl/Buffer.h"
-#include "open_cl/ClCommandQueue.h"
-#include "open_cl/ClContext.h"
-#include "open_cl/ClDevice.h"
-#include "open_cl/ClKernel.h"
-#include "open_cl/ClProgram.h"
-#include "open_cl/DataType.h"
-#include "open_cl/DeviceInfo.h"
-#include "open_cl/Precision.h"
-#include "open_cl/ProgramCache.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/TensorType.h"
-#include "open_cl/Types.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// kCustom: default value
-//   GPUOperation::GetGridSize must be overloaded
-// kWBToX_HDToY_SToZ:
-//   grid_x = dst_[0]->Width() * dst_[0]->Batch();
-//   grid_y = dst_[0]->Height() * dst_[0]->Depth();
-//   grid_z = dst_[0]->Slices();
-// kWBToX_HDToY_ZIs1:
-//   grid_x = dst_[0]->Width() * dst_[0]->Batch();
-//   grid_y = dst_[0]->Height() * dst_[0]->Depth();
-//   grid_z = 1;
-// kWBToX_HToY_DToZ:
-//   grid_x = dst_[0]->Width() * dst_[0]->Batch();
-//   grid_y = dst_[0]->Height();
-//   grid_z = dst_[0]->Depth();
-// kBToX_YIs1_ZIs1:
-//   grid_x = dst_[0]->Batch();
-//   grid_y = 1;
-//   grid_z = 1;
-enum class TensorToGrid
-{
-  kCustom,
-  kWBToX_HDToY_SToZ,
-  kWBToX_HDToY_ZIs1,
-  kWBToX_HToY_DToZ,
-  kBToX_YIs1_ZIs1
-};
-
-struct CreationContext
-{
-  const CLDevice *device;
-  CLContext *context;
-  CLCommandQueue *queue;
-  ProgramCache *cache;
-
-  const DeviceInfo &GetDeviceInfo() const { return device->info_; }
-};
-
-struct OperationDef
-{
-  CalculationsPrecision precision;
-  std::vector<TensorDescriptor> src_tensors;
-  std::vector<TensorDescriptor> dst_tensors;
-
-  // returns FLOAT32 for F32 precision and FLOAT16 for F16 precision
-  DataType GetDataType() const;
-  // Primary means the first src tensor, because first tensor usually defines
-  // the structure of kernel, all other resources(biases) types and etc.
-  DataType GetPrimaryDataType() const;
-  TensorStorageType GetPrimaryStorageType() const;
-  bool IsBatchSupported() const;
-};
-
-// GPUOperation represents some implementation of neural network operation on
-// GPU. GPUOperation can contain another GPU operations with flag elementwise_.
-// When GPUOperation contains another GPU ops, this GPUoperation replaces
-// some sequence of operations Op + op0 + op1 + ...
-// Because of this abilities of GPUOperation, usage scenario is next:
-// Create instance of GPUOperation.
-// Create all instances of GPUOperations that we will(probably) attach
-// to GPUOperation. Attach all GPUOperations to GPUOperation. Call
-// GPUOperation.Compile(). Don't call GPUOperations.Compile() if it
-// attached, it useless(and may be error)
-class GPUOperation
-{
-public:
-  GPUOperation() = default;
-  explicit GPUOperation(const OperationDef &definition);
-  virtual ~GPUOperation() = default;
-  // Move only
-  GPUOperation(GPUOperation &&operation);
-  GPUOperation &operator=(GPUOperation &&operation);
-  GPUOperation(const GPUOperation &) = delete;
-  GPUOperation &operator=(const GPUOperation &) = delete;
-
-  absl::Status AddOperation(GPUOperation *operation);
-
-  void SetSrc(Tensor *ptr, int index = 0);
-  void SetDst(Tensor *ptr, int index = 0);
-
-  // should be called after changes of inputs/outputs.
-  absl::Status UpdateParams();
-
-  absl::Status AddToQueue(CLCommandQueue *queue)
-  {
-    RETURN_IF_ERROR(args_.Bind(kernel_.kernel()));
-    return queue->Dispatch(kernel_, work_groups_count_, work_group_size_);
-  }
-
-  virtual void GetPossibleKernelWorkGroups(TuningType tuning_type, const DeviceInfo &device_info,
-                                           const KernelInfo &kernel_info,
-                                           std::vector<int3> *work_groups) const;
-
-  absl::Status Tune(const TuningParameters &params);
-
-  absl::Status AssembleCode(const DeviceInfo &device_info, CLContext *context);
-
-  absl::Status Compile(const CreationContext &creation_context);
-
-  absl::Status CompileDeserialized(const CreationContext &creation_context);
-
-  virtual absl::Status PostCompileCheck(const DeviceInfo &, const KernelInfo &)
-  {
-    return absl::OkStatus();
-  }
-
-  const OperationDef &GetDefinition() const { return definition_; }
-
-  void AddSrcTensor(const std::string &tensor_name, const TensorDescriptor &desc);
-  void AddSrcBuffer(const std::string &buffer_name, const BufferDescriptor &desc);
-  void AddDstTensor(const std::string &tensor_name, const TensorDescriptor &desc);
-
-  bool IsLinkable() const { return elementwise_ && linkable_; }
-
-  // for linking
-  void AddUniquePostfix(const std::string &unique_postfix);
-
-  Arguments args_;
-  std::string code_;
-  int3 work_group_size_ = int3(8, 4, 1);
-  std::vector<CompilerOptions> compiler_options_;
-  // not applicable to elementwise
-  TensorToGrid tensor_to_grid_ = TensorToGrid::kCustom;
-
-  bool elementwise_ = false;
-  // applicable only with elementwise_ = true;
-  bool linkable_ = true; // by default every elementwise is linkable
-  // applicable only with elementwise_ = true;
-  bool check_src_channels_size_ = false;
-
-protected:
-  virtual absl::Status BindArguments(ArgumentsBinder *) { return absl::OkStatus(); }
-  virtual int3 GetGridSize() const;
-
-  // Defines operation calculation precision and format of src/dst tensors.
-  OperationDef definition_;
-  std::vector<Tensor *> src_;
-  std::vector<Tensor *> dst_;
-  CLKernel kernel_;
-  int grid_dimension_ = 3; // can be 1, 2 or 3
-  int3 work_group_launch_order_ = int3(0, 1, 2);
-  int3 grid_size_ = int3(0, 0, 0);
-  std::vector<std::string> src_tensors_names_;
-  std::vector<std::string> dst_tensors_names_;
-
-private:
-  int3 work_groups_count_ = int3(0, 0, 0);
-  int linkable_count_ = 0;
-  std::string elementwise_code_; // temporary, used during op construction
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_GPU_OPERATION_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Pooling.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Pooling.cc
deleted file mode 100644
index ceeab2f39..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Pooling.cc
+++ /dev/null
@@ -1,400 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Pooling.h"
-
-#include <string>
-
-#include "Util.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-std::string GetAveragePoolingKernelCode(const OperationDef &op_def, bool stride_correction,
-                                        GPUOperation *op)
-{
-  auto src_desc = op_def.src_tensors[0];
-
-  src_desc.SetTextureAddressMode(TextureAddressMode::ZERO);
-
-  if (op_def.IsBatchSupported())
-  {
-    src_desc.SetStateVar("BatchedWidth", "true");
-  }
-  op->AddSrcTensor("src_tensor", src_desc);
-  auto dst_desc = op_def.dst_tensors[0];
-  if (op_def.IsBatchSupported())
-  {
-    dst_desc.SetStateVar("BatchedWidth", "true");
-  }
-  op->AddDstTensor("dst_tensor", dst_desc);
-
-  std::map<Axis, std::string> axis_to_src_coord = {
-    {Axis::WIDTH, "x_c"},  {Axis::HEIGHT, "y_c"}, {Axis::DEPTH, "d_c"},
-    {Axis::CHANNELS, "Z"}, {Axis::BATCH, "B"},
-  };
-
-  std::map<Axis, std::string> axis_to_dst_coord = {
-    {Axis::WIDTH, "X"},    {Axis::HEIGHT, "Y"}, {Axis::DEPTH, "D"},
-    {Axis::CHANNELS, "Z"}, {Axis::BATCH, "B"},
-  };
-
-  std::vector<std::string> src_coords;
-  std::vector<std::string> dst_coords;
-  for (auto axis : {Axis::WIDTH, Axis::HEIGHT, Axis::DEPTH, Axis::CHANNELS})
-  {
-    if (op_def.dst_tensors[0].HasAxis(axis))
-    {
-      dst_coords.push_back(axis_to_dst_coord[axis]);
-    }
-    if (op_def.src_tensors[0].HasAxis(axis))
-    {
-      src_coords.push_back(axis_to_src_coord[axis]);
-    }
-  }
-  std::string src_coord = src_coords[0];
-  for (size_t i = 1; i < src_coords.size(); ++i)
-  {
-    src_coord += ", " + src_coords[i];
-  }
-  std::string dst_coord = dst_coords[0];
-  for (size_t i = 1; i < dst_coords.size(); ++i)
-  {
-    dst_coord += ", " + dst_coords[i];
-  }
-
-  const bool manual_clamp = op_def.src_tensors[0].storage_type == TensorStorageType::BUFFER ||
-                            op_def.src_tensors[0].storage_type == TensorStorageType::IMAGE_BUFFER;
-
-  std::string c = GetCommonDefines(op_def.precision);
-  c += "__kernel void main_function(\n";
-  c += "$0) {\n";
-  c += "  int X = get_global_id(0);\n";
-  if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
-  {
-    c += "  int linear_id_1 = get_global_id(1);\n";
-    c += "  int Y = linear_id_1 / args.dst_tensor.Depth();\n";
-    c += "  int D = linear_id_1 % args.dst_tensor.Depth();\n";
-  }
-  else
-  {
-    c += "  int Y = get_global_id(1);\n";
-  }
-  c += "  int Z = get_global_id(2);\n";
-  c += "  if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || "
-       "Z >= args.dst_tensor.Slices()) { \n";
-  c += "    return; \n";
-  c += "  } \n";
-  c += "  float4 r = (float4)(0.0f);\n";
-  c += "  float window_size = 0.0;\n";
-  if (stride_correction)
-  {
-    c += "  int xs = " +
-         GetXStrideCorrectedV2("X", "args.src_tensor.Batch()", "args.stride_x", "args.padding_x") +
-         ";\n";
-  }
-  else
-  {
-    if (op_def.IsBatchSupported())
-    {
-      c += "  int xs = X * args.stride_x + args.padding_x * "
-           "args.src_tensor.Batch();\n";
-    }
-    else
-    {
-      c += "  int xs = X * args.stride_x + args.padding_x;\n";
-    }
-  }
-  c += "  int ys = Y * args.stride_y + args.padding_y;\n";
-  if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
-  {
-    c += "  int ds = D * args.stride_z + args.padding_z;\n";
-    c += "  for (int kz = 0; kz < args.kernel_size_z; ++kz) {\n";
-    c += "    int d_c = ds + kz;\n";
-    c += "    if (d_c < 0 || d_c >= args.src_tensor.Depth()) continue;\n";
-  }
-  c += "  for (int ky = 0; ky < args.kernel_size_y; ++ky) {\n";
-  c += "    int y_c = ys + ky;\n";
-  c += "    bool outside_y = y_c < 0 || y_c >= args.src_tensor.Height();\n";
-  c += "    for (int kx = 0; kx < args.kernel_size_x; ++kx) {\n";
-  if (op_def.IsBatchSupported())
-  {
-    c += "      int x_c = xs + kx * args.src_tensor.Batch();\n";
-  }
-  else
-  {
-    c += "      int x_c = xs + kx;\n";
-  }
-  c += "      bool outside = outside_y || x_c < 0 || x_c >= "
-       "args.src_tensor.Width();\n";
-  if (manual_clamp)
-  {
-    c += "     r += !outside ? args.src_tensor.Read<float>(" + src_coord +
-         ") : "
-         "(float4)(0.0f);\n";
-  }
-  else
-  {
-    c += "      r += args.src_tensor.Read<float>(" + src_coord + ");\n";
-  }
-  c += "        window_size += !outside ? 1.0 : 0.0;\n";
-  c += "    }\n";
-  c += "  }\n";
-  if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
-  {
-    c += "  }  // Depth\n";
-  }
-  // If window_size==0, window covered nothing. This situation is a sign of
-  // incorrectly constructed operation. NaNs are expected as output.
-  c += "  FLT4 result = TO_FLT4(r / window_size);\n";
-  c += "  args.dst_tensor.Write(result, " + dst_coord + ");\n";
-  c += "}\n";
-
-  return c;
-}
-
-std::string GetMaxPoolingKernelCode(const OperationDef &op_def, bool stride_correction,
-                                    bool output_indices, GPUOperation *op)
-{
-  auto src_desc = op_def.src_tensors[0];
-  if (op_def.IsBatchSupported())
-  {
-    src_desc.SetStateVar("BatchedWidth", "true");
-  }
-  op->AddSrcTensor("src_tensor", src_desc);
-  auto dst_desc = op_def.dst_tensors[0];
-  if (op_def.IsBatchSupported())
-  {
-    dst_desc.SetStateVar("BatchedWidth", "true");
-  }
-  op->AddDstTensor("dst_tensor", dst_desc);
-  if (output_indices)
-  {
-    auto dst_ind_desc = op_def.dst_tensors[1];
-    if (op_def.IsBatchSupported())
-    {
-      dst_ind_desc.SetStateVar("BatchedWidth", "true");
-    }
-    op->AddDstTensor("dst_indices", dst_ind_desc);
-  }
-
-  std::map<Axis, std::string> axis_to_src_coord = {
-    {Axis::WIDTH, "x_c"},  {Axis::HEIGHT, "y_c"}, {Axis::DEPTH, "d_c"},
-    {Axis::CHANNELS, "Z"}, {Axis::BATCH, "B"},
-  };
-
-  std::map<Axis, std::string> axis_to_dst_coord = {
-    {Axis::WIDTH, "X"},    {Axis::HEIGHT, "Y"}, {Axis::DEPTH, "D"},
-    {Axis::CHANNELS, "Z"}, {Axis::BATCH, "B"},
-  };
-
-  std::vector<std::string> src_coords;
-  std::vector<std::string> dst_coords;
-  for (auto axis : {Axis::WIDTH, Axis::HEIGHT, Axis::DEPTH, Axis::CHANNELS})
-  {
-    if (op_def.dst_tensors[0].HasAxis(axis))
-    {
-      dst_coords.push_back(axis_to_dst_coord[axis]);
-    }
-    if (op_def.src_tensors[0].HasAxis(axis))
-    {
-      src_coords.push_back(axis_to_src_coord[axis]);
-    }
-  }
-  std::string src_coord = src_coords[0];
-  for (size_t i = 1; i < src_coords.size(); ++i)
-  {
-    src_coord += ", " + src_coords[i];
-  }
-  std::string dst_coord = dst_coords[0];
-  for (size_t i = 1; i < dst_coords.size(); ++i)
-  {
-    dst_coord += ", " + dst_coords[i];
-  }
-
-  std::string c = GetCommonDefines(op_def.precision);
-  c += "__kernel void main_function(\n";
-  c += "$0) {\n";
-  c += "  int X = get_global_id(0);\n";
-  if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
-  {
-    c += "  int linear_id_1 = get_global_id(1);\n";
-    c += "  int Y = linear_id_1 / args.dst_tensor.Depth();\n";
-    c += "  int D = linear_id_1 % args.dst_tensor.Depth();\n";
-  }
-  else
-  {
-    c += "  int Y = get_global_id(1);\n";
-  }
-  c += "  int Z = get_global_id(2);\n";
-  c += "  if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || "
-       "Z >= args.dst_tensor.Slices()) { \n";
-  c += "    return; \n";
-  c += "  } \n";
-  c += "  FLT4 maximum = (FLT4)(-10000.0f);\n";
-  if (output_indices)
-  {
-    c += "  FLT4 indexes = (FLT4)(0.0f);\n";
-  }
-  if (stride_correction)
-  {
-    c += "  int xs = " +
-         GetXStrideCorrectedV2("X", "args.src_tensor.Batch()", "args.stride_x", "args.padding_x") +
-         ";\n";
-  }
-  else
-  {
-    if (op_def.IsBatchSupported())
-    {
-      c += "  int xs = X * args.stride_x + args.padding_x * "
-           "args.src_tensor.Batch();\n";
-    }
-    else
-    {
-      c += "  int xs = X * args.stride_x + args.padding_x;\n";
-    }
-  }
-  c += "  int ys = Y * args.stride_y + args.padding_y;\n";
-  c += "  for (int ky = 0; ky < args.kernel_size_y; ++ky) {\n";
-  c += "    int y_c = ys + ky;\n";
-  c += "    if (y_c < 0 || y_c >= args.src_tensor.Height()) continue;\n";
-  c += "    for (int kx = 0; kx < args.kernel_size_x; ++kx) {\n";
-  if (op_def.IsBatchSupported())
-  {
-    c += "      int x_c = xs + kx * args.src_tensor.Batch();\n";
-  }
-  else
-  {
-    c += "      int x_c = xs + kx;\n";
-  }
-  c += "      if (x_c < 0 || x_c >= args.src_tensor.Width()) continue;\n";
-  if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
-  {
-    c += "    int ds = D * args.stride_z + args.padding_z;\n";
-    c += "    for (int kz = 0; kz < args.kernel_size_z; ++kz) {\n";
-    c += "    int d_c = ds + kz;\n";
-    c += "      if (d_c < 0 || d_c >= args.src_tensor.Depth()) continue;\n";
-  }
-  c += "      FLT4 src = args.src_tensor.Read(" + src_coord + ");\n";
-  if (output_indices)
-  {
-    if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
-    {
-      c += "      FLT index_counter = (FLT)((ky * args.kernel_size_x + kx) * "
-           "args.kernel_size_z + kz) + (FLT)(0.1f);\n";
-    }
-    else
-    {
-      c += "      FLT index_counter = (FLT)(ky * args.kernel_size_x + kx) + "
-           "(FLT)(0.1f);\n";
-    }
-    c += "      if (src.x > maximum.x) {\n";
-    c += "        indexes.x = index_counter;\n";
-    c += "        maximum.x = src.x;\n";
-    c += "      }\n";
-    c += "      if (src.y > maximum.y) {\n";
-    c += "        indexes.y = index_counter;\n";
-    c += "        maximum.y = src.y;\n";
-    c += "      }\n";
-    c += "      if (src.z > maximum.z) {\n";
-    c += "        indexes.z = index_counter;\n";
-    c += "        maximum.z = src.z;\n";
-    c += "      }\n";
-    c += "      if (src.w > maximum.w) {\n";
-    c += "        indexes.w = index_counter;\n";
-    c += "        maximum.w = src.w;\n";
-    c += "      }\n";
-  }
-  else
-  {
-    c += "      maximum = max(src, maximum);\n";
-  }
-  if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH))
-  {
-    c += "    }  // Depth\n";
-  }
-  c += "    }\n";
-  c += "  }\n";
-  c += "  args.dst_tensor.Write(maximum, " + dst_coord + ");\n";
-  if (output_indices)
-  {
-    c += "  args.dst_indices.Write(indexes, " + dst_coord + ");\n";
-  }
-  c += "}\n";
-
-  return c;
-}
-} // namespace
-
-GPUOperation CreatePooling(const OperationDef &definition, const Pooling2DAttributes &attr)
-{
-  GPUOperation op(definition);
-  op.args_.AddInt("kernel_size_x", attr.kernel.w);
-  op.args_.AddInt("padding_x", -attr.padding.prepended.w);
-  op.args_.AddInt("stride_x", attr.strides.w);
-  op.args_.AddInt("kernel_size_y", attr.kernel.h);
-  op.args_.AddInt("padding_y", -attr.padding.prepended.h);
-  op.args_.AddInt("stride_y", attr.strides.h);
-
-  const bool stride_correction = definition.IsBatchSupported() && attr.strides.w != 1;
-  if (attr.type == PoolingType::AVERAGE)
-  {
-    op.code_ = GetAveragePoolingKernelCode(definition, stride_correction, &op);
-  }
-  else if (attr.type == PoolingType::MAX)
-  {
-    op.code_ = GetMaxPoolingKernelCode(definition, stride_correction, attr.output_indices, &op);
-  }
-  op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ;
-  return op;
-}
-
-GPUOperation CreatePooling(const OperationDef &definition, const Pooling3DAttributes &attr)
-{
-  GPUOperation op(definition);
-  op.args_.AddInt("kernel_size_x", attr.kernel.w);
-  op.args_.AddInt("padding_x", -attr.padding.prepended.w);
-  op.args_.AddInt("stride_x", attr.strides.w);
-  op.args_.AddInt("kernel_size_y", attr.kernel.h);
-  op.args_.AddInt("padding_y", -attr.padding.prepended.h);
-  op.args_.AddInt("stride_y", attr.strides.h);
-  op.args_.AddInt("kernel_size_z", attr.kernel.d);
-  op.args_.AddInt("padding_z", -attr.padding.prepended.d);
-  op.args_.AddInt("stride_z", attr.strides.d);
-  const bool stride_correction = definition.IsBatchSupported() && attr.strides.w != 1;
-  if (attr.type == PoolingType::AVERAGE)
-  {
-    op.code_ = GetAveragePoolingKernelCode(definition, stride_correction, &op);
-  }
-  else if (attr.type == PoolingType::MAX)
-  {
-    op.code_ = GetMaxPoolingKernelCode(definition, stride_correction, attr.output_indices, &op);
-  }
-  op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ;
-  return op;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Pooling.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Pooling.h
deleted file mode 100644
index 166d81591..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Pooling.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_POOLING_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_POOLING_H__
-
-#include "GpuOperation.h"
-
-#include "open_cl/Operations.h"
-#include "open_cl/Precision.h"
-#include "open_cl/ClKernel.h"
-#include "open_cl/Tensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-GPUOperation CreatePooling(const OperationDef &definition, const Pooling2DAttributes &attr);
-
-GPUOperation CreatePooling(const OperationDef &definition, const Pooling3DAttributes &attr);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_ADD_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Relu.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Relu.cc
deleted file mode 100644
index 37f87e599..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Relu.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Relu.h"
-
-#include <string>
-#include "Util.h"
-#include "GpuOperation.h"
-#include "absl/strings/str_cat.h"
-#include "open_cl/Precision.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-GPUOperation CreateReLU(const OperationDef &definition, const ReLUAttributes &attr)
-{
-  GPUOperation op(definition);
-  op.elementwise_ = true;
-
-  std::string min_func;
-  if (attr.alpha != 0.0f)
-  {
-    min_func = "min(in_out_value * args.alpha, (FLT)(0.0f))";
-    if (definition.precision == CalculationsPrecision::F32)
-    {
-      op.args_.AddFloat("alpha", attr.alpha);
-    }
-    else
-    {
-#ifdef FIXME_PORTING_HALF_REQIRED
-      op.args_.AddHalf("alpha", half(attr.alpha));
-#endif
-    }
-  }
-  else
-  {
-    min_func = "(FLT)(0.0f)";
-  }
-  if (attr.clip != 0.0f)
-  {
-    if (definition.precision == CalculationsPrecision::F32)
-    {
-      op.args_.AddFloat("clip", attr.clip);
-    }
-    else
-    {
-#ifdef FIXME_PORTING_HALF_REQIRED
-      op.args_.AddHalf("clip", half(attr.clip));
-#endif
-    }
-    op.code_ = absl::StrCat("in_out_value = clamp(in_out_value, " + min_func + ", args.clip);");
-  }
-  else
-  {
-    op.code_ = absl::StrCat("in_out_value = max(in_out_value, ", min_func, ");");
-  }
-  return op;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Relu.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Relu.h
deleted file mode 100644
index eb6b1ad1d..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Relu.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_RELU_H__
-#define __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_RELU_H__
-
-#include "open_cl/ClKernel.h"
-#include "GpuOperation.h"
-#include "open_cl/Precision.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Types.h"
-#include "open_cl/Operations.h"
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-GPUOperation CreateReLU(const OperationDef &definition, const ReLUAttributes &attr);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_RELU_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshape.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshape.cc
deleted file mode 100644
index cdd3e8364..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshape.cc
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Reshape.h"
-
-#include <string>
-
-#include "Util.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-std::string GetReshapeCode(const OperationDef &op_def)
-{
-  std::string c = GetCommonDefines(op_def.precision);
-  c += "__kernel void main_function(\n";
-  c += "$0) {\n";
-  if (op_def.dst_tensors[0].HasAxis(Axis::BATCH))
-  {
-    c += "  int linear_id = get_global_id(0);\n";
-    c += "  int X = linear_id / args.dst_tensor.Batch();\n";
-    c += "  int B = linear_id % args.dst_tensor.Batch();\n";
-    c += "  args.dst_tensor.SetBatchRef(B);\n";
-  }
-  else
-  {
-    c += "  int X = get_global_id(0);\n";
-  }
-  c += "  int Y = get_global_id(1);\n";
-  c += "  int Z = get_global_id(2);\n";
-  c += "  if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || "
-       "Z >= args.dst_tensor.Slices()) { \n";
-  c += "    return; \n";
-  c += "  } \n";
-  c += "  FLT temps[4];\n";
-  c += "  temps[0] = (FLT)(0.0f);\n";
-  c += "  temps[1] = (FLT)(0.0f);\n";
-  c += "  temps[2] = (FLT)(0.0f);\n";
-  c += "  temps[3] = (FLT)(0.0f);\n";
-  if (op_def.dst_tensors[0].HasAxis(Axis::BATCH))
-  {
-    c += "  int base = B;\n";
-  }
-  else
-  {
-    c += "  int base = 0;\n";
-  }
-  c += "  base = ((base * args.dst_tensor.Height() + Y) * "
-       "args.dst_tensor.Width() + X) * args.dst_tensor.Channels() + Z * 4;\n";
-  c += "  for (int i = 0; i < 4; ++i) {\n";
-  c += "    int dst_channel = Z * 4 + i;\n";
-  c += "    if (dst_channel < args.dst_tensor.Channels()) {;\n";
-  c += "      int p = base + i;\n";
-  c += "      int src_c = p % args.src_tensor.Channels();\n";
-  c += "      p = p / args.src_tensor.Channels();\n";
-  c += "      int src_x = p % args.src_tensor.Width();\n";
-  c += "      p = p / args.src_tensor.Width();\n";
-  c += "      int src_y = p % args.src_tensor.Height();\n";
-  if (op_def.src_tensors[0].HasAxis(Axis::BATCH))
-  {
-    c += "  int src_b = p / args.src_tensor.Height();\n";
-    c += "  args.src_tensor.SetBatchRef(src_b);\n";
-  }
-  c += "      int src_z = src_c / 4;\n";
-  c += "      int src_sub_ch = src_c % 4;\n";
-  c += "      FLT4 t = args.src_tensor.Read(src_x, src_y, src_z);\n";
-  c += "      FLT t_ar[4] = {t.x, t.y, t.z, t.w};\n";
-  c += "      temps[i] = t_ar[src_sub_ch];\n";
-  c += "    }\n";
-  c += "  }\n";
-  c += "  FLT4 result = (FLT4)(temps[0], temps[1], temps[2], temps[3]);\n";
-  c += "  args.dst_tensor.Write(result, X, Y, Z);\n";
-  c += "}\n";
-  return c;
-}
-
-} // namespace
-
-GPUOperation CreateReshape(const OperationDef &definition)
-{
-  GPUOperation op(definition);
-  op.AddSrcTensor("src_tensor", definition.src_tensors[0]);
-  op.AddDstTensor("dst_tensor", definition.dst_tensors[0]);
-  op.code_ = GetReshapeCode(definition);
-  op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ;
-  return op;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshape.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshape.h
deleted file mode 100644
index 4f7c5ea38..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshape.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_RESHAPE_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_RESHAPE_H__
-
-#include "GpuOperation.h"
-
-#include "open_cl/Operations.h"
-#include "open_cl/Precision.h"
-#include "open_cl/ClKernel.h"
-#include "open_cl/Tensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-GPUOperation CreateReshape(const OperationDef &definition);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_RESHAPE_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshapex4.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshapex4.cc
deleted file mode 100644
index 13010e791..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshapex4.cc
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Reshape.h"
-
-#include <string>
-
-#include "Util.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-std::string GetReshapeCode(const OperationDef &op_def)
-{
-  std::string c = GetCommonDefines(op_def.precision);
-  c += "__kernel void main_function(\n";
-  c += "$0) {\n";
-  if (op_def.dst_tensors[0].HasAxis(Axis::BATCH))
-  {
-    c += "  int linear_id = get_global_id(0);\n";
-    c += "  int X = linear_id / args.dst_tensor.Batch();\n";
-    c += "  int B = linear_id % args.dst_tensor.Batch();\n";
-    c += "  args.dst_tensor.SetBatchRef(B);\n";
-  }
-  else
-  {
-    c += "  int X = get_global_id(0);\n";
-  }
-  c += "  int Y = get_global_id(1);\n";
-  c += "  int Z = get_global_id(2);\n";
-  c += "  if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || "
-       "Z >= args.dst_tensor.Slices()) { \n";
-  c += "    return; \n";
-  c += "  } \n";
-  if (op_def.dst_tensors[0].HasAxis(Axis::BATCH))
-  {
-    c += "  int dst_bhwc4 = B;\n";
-  }
-  else
-  {
-    c += "  int dst_bhwc4 = 0;\n";
-  }
-  c += "  dst_bhwc4 = ((dst_bhwc4 * args.dst_tensor.Height() + Y) * "
-       "args.dst_tensor.Width() + X) * args.dst_tensor.Slices() + Z;\n";
-  c += "  int src_z = dst_bhwc4 % args.src_tensor.Slices();\n";
-  c += "  dst_bhwc4 = dst_bhwc4 / args.src_tensor.Slices();\n";
-  c += "  int src_x = dst_bhwc4 % args.src_tensor.Width();\n";
-  c += "  dst_bhwc4 = dst_bhwc4 / args.src_tensor.Width();\n";
-  c += "  int src_y = dst_bhwc4 % args.src_tensor.Height();\n";
-  if (op_def.src_tensors[0].HasAxis(Axis::BATCH))
-  {
-    c += "  int src_b = dst_bhwc4 / args.src_tensor.Height();\n";
-    c += "  args.src_tensor.SetBatchRef(src_b);\n";
-  }
-  c += "  FLT4 result = args.src_tensor.Read(src_x, src_y, src_z);\n";
-  c += "  args.dst_tensor.Write(result, X, Y, Z);\n";
-  c += "}\n";
-  return c;
-}
-
-} // namespace
-
-GPUOperation CreateReshapex4(const OperationDef &definition)
-{
-  GPUOperation op(definition);
-  op.AddSrcTensor("src_tensor", definition.src_tensors[0]);
-  op.AddDstTensor("dst_tensor", definition.dst_tensors[0]);
-  op.code_ = GetReshapeCode(definition);
-  op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ;
-  return op;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshapex4.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshapex4.h
deleted file mode 100644
index 8988e8bd4..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Reshapex4.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_RESHAPEX4_H__
-#define __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_RESHAPEX4_H__
-
-#include "GpuOperation.h"
-
-#include "open_cl/Operations.h"
-#include "open_cl/Precision.h"
-#include "open_cl/ClKernel.h"
-#include "open_cl/Tensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// More optimized, but require src_channels % 4 == 0 and dst_channels % 4 == 0
-GPUOperation CreateReshapex4(const OperationDef &definition);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_RESHAPEX4_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax.cc
deleted file mode 100644
index 4ee164d82..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax.cc
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Softmax.h"
-
-#include <string>
-
-#include "Util.h"
-#include "WorkGroupPicking.h"
-#include "GpuOperation.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-namespace
-{
-std::string GetSoftmaxKernelCode(const OperationDef &op_def)
-{
-  std::string c = GetCommonDefines(op_def.precision);
-  c += "__kernel void main_function(\n";
-  c += "$0) {\n";
-  c += "  int X = get_global_id(0);\n";
-  c += "  int Y = get_global_id(1);\n";
-  c += "  if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height()) "
-       "return; \n";
-  c += "  float sum = 0.0f;\n";
-  c += "  for (int d = 0; d < args.dst_tensor.Slices(); ++d) {\n";
-  c += "    float4 t = args.src_tensor.Read<float>(X, Y, d);\n";
-  c += "    sum += exp(t.x);\n";
-  c += "    if (d * 4 + 1 < args.dst_tensor.Channels()) sum += exp(t.y);\n";
-  c += "    if (d * 4 + 2 < args.dst_tensor.Channels()) sum += exp(t.z);\n";
-  c += "    if (d * 4 + 3 < args.dst_tensor.Channels()) sum += exp(t.w);\n";
-  c += "  }\n";
-  c += "  for (int d = 0; d < args.dst_tensor.Slices(); ++d) {\n";
-  c += "    float4 t = args.src_tensor.Read<float>(X, Y, d);\n";
-  c += "    t = exp(t) / sum;\n";
-  c += "    FLT4 result = TO_FLT4(t);\n";
-  c += "    args.dst_tensor.Write(result, X, Y, d);\n";
-  c += "  }\n";
-  c += "}\n";
-  return c;
-}
-} // namespace
-
-GPUOperation CreateSoftmax(const OperationDef &definition)
-{
-  GPUOperation op(definition);
-  auto src_desc = definition.src_tensors[0];
-  if (definition.IsBatchSupported())
-  {
-    src_desc.SetStateVar("BatchedWidth", "true");
-  }
-  op.AddSrcTensor("src_tensor", src_desc);
-  auto dst_desc = definition.dst_tensors[0];
-  if (definition.IsBatchSupported())
-  {
-    dst_desc.SetStateVar("BatchedWidth", "true");
-  }
-  op.AddDstTensor("dst_tensor", dst_desc);
-  op.code_ = GetSoftmaxKernelCode(definition);
-  op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_ZIs1;
-  return op;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax.h
deleted file mode 100644
index 594bab042..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_SOFTMAX_H__
-#define __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_SOFTMAX_H__
-
-#include "open_cl/ClKernel.h"
-#include "GpuOperation.h"
-#include "open_cl/Precision.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Types.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-GPUOperation CreateSoftmax(const OperationDef &definition);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_SOFTMAX_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax1x1.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax1x1.cc
deleted file mode 100644
index 590952dca..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax1x1.cc
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Softmax1x1.h"
-
-#include <string>
-
-#include "Util.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-Softmax1x1::Softmax1x1(const OperationDef &definition) : GPUOperation(definition)
-{
-  work_group_size_ = int3(32, 1, 1);
-  code_ = GetSoftmaxKernelCode(definition_);
-}
-
-Softmax1x1::Softmax1x1(Softmax1x1 &&kernel) : GPUOperation(std::move(kernel)) {}
-
-Softmax1x1 &Softmax1x1::operator=(Softmax1x1 &&kernel)
-{
-  if (this != &kernel)
-  {
-    GPUOperation::operator=(std::move(kernel));
-  }
-  return *this;
-}
-
-std::string Softmax1x1::GetSoftmaxKernelCode(const OperationDef &op_def)
-{
-  AddSrcTensor("src_tensor", op_def.src_tensors[0]);
-  AddDstTensor("dst_tensor", op_def.dst_tensors[0]);
-  args_.AddFloat("mask_x");
-  args_.AddFloat("mask_y");
-  args_.AddFloat("mask_z");
-  args_.AddFloat("mask_w");
-  args_.AddInt("slices_x32");
-
-  std::string c = GetCommonDefines(op_def.precision);
-  c += "__kernel void main_function(\n";
-  c += "$0) {\n";
-  if (op_def.IsBatchSupported())
-  {
-    c += "  int batch_id = get_global_id(1);\n";
-    c += "  if (batch_id >= args.dst_tensor.Batch()) return;\n";
-    c += "  args.dst_tensor.SetBatchRef(batch_id);\n";
-    c += "  args.src_tensor.SetBatchRef(batch_id);\n";
-  }
-  c += "  float4 mask = (float4)(args.mask_x, args.mask_y, args.mask_z, "
-       "args.mask_w);\n";
-  c += "  int offset = 0;\n";
-  c += "  float sum = 0.0f;\n";
-  c += "  int s = 0;\n";
-  c += "  int tid = get_local_id(0);\n";
-  c += "  do {\n";
-  c += "    int z = offset + tid;\n";
-  c += "    if (z < args.dst_tensor.Slices()) {\n";
-  c += "      float4 mask_temp = z == args.dst_tensor.Slices() - 1 ? mask : "
-       "(float4)(1.0f);\n";
-  c += "      float4 src = args.src_tensor.Read<float>(0, 0, z);\n";
-  c += "      sum += dot(mask_temp, exp(src));\n";
-  c += "      offset += 32;\n";
-  c += "    }\n";
-  c += "    s++;\n";
-  c += "  } while (s < args.slices_x32);\n";
-  c += "\n";
-  c += "  __local float4 tmp[8];\n";
-  c += "  __local float* tmpx1 = (__local float*)tmp;\n";
-  c += "  tmpx1[tid] = sum;\n";
-  c += "  barrier(CLK_LOCAL_MEM_FENCE);\n";
-  c += "  if (tid == 0) {\n";
-  c += "    sum = dot((float4)(1.0f), tmp[0]);\n";
-  c += "    sum += dot((float4)(1.0f), tmp[1]);\n";
-  c += "    sum += dot((float4)(1.0f), tmp[2]);\n";
-  c += "    sum += dot((float4)(1.0f), tmp[3]);\n";
-  c += "    sum += dot((float4)(1.0f), tmp[4]);\n";
-  c += "    sum += dot((float4)(1.0f), tmp[5]);\n";
-  c += "    sum += dot((float4)(1.0f), tmp[6]);\n";
-  c += "    sum += dot((float4)(1.0f), tmp[7]);\n";
-  c += "    tmpx1[0] = 1.0f / sum;\n";
-  c += "  }\n";
-  c += "  barrier(CLK_LOCAL_MEM_FENCE);\n";
-  c += "  sum = tmpx1[0];\n";
-  c += "\n";
-  c += "  offset = 0;\n";
-  c += "  s = 0;\n";
-  c += "  do {\n";
-  c += "    int z = offset + tid;\n";
-  c += "    if (z < args.dst_tensor.Slices()) {\n";
-  c += "      FLT4 res = TO_FLT4(exp(args.src_tensor.Read<float>(0, 0, "
-       "z))*sum);\n";
-  c += "      args.dst_tensor.Write(res, 0, 0, z);\n";
-  c += "      offset += 32;\n";
-  c += "    }\n";
-  c += "    s++;\n";
-  c += "  } while (s < args.slices_x32);\n";
-  c += "}\n";
-  return c;
-}
-
-absl::Status Softmax1x1::BindArguments(ArgumentsBinder *args)
-{
-  float4 mask = GetMaskForLastPlane(src_[0]->Channels());
-  RETURN_IF_ERROR(args->SetFloat("mask_x", mask.x));
-  RETURN_IF_ERROR(args->SetFloat("mask_y", mask.y));
-  RETURN_IF_ERROR(args->SetFloat("mask_z", mask.z));
-  RETURN_IF_ERROR(args->SetFloat("mask_w", mask.w));
-  RETURN_IF_ERROR(args->SetInt("slices_x32", DivideRoundUp(src_[0]->Slices(), 32)));
-  return absl::OkStatus();
-}
-
-int3 Softmax1x1::GetGridSize() const { return int3(32, dst_[0]->Batch(), 1); }
-
-Softmax1x1 CreateSoftmax1x1(const OperationDef &definition) { return Softmax1x1(definition); }
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax1x1.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax1x1.h
deleted file mode 100644
index da375d457..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Softmax1x1.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_SOFTMAX1X1_H__
-#define __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_SOFTMAX1X1_H__
-
-#include "GpuOperation.h"
-
-#include "open_cl/Precision.h"
-#include "open_cl/ClKernel.h"
-#include "open_cl/Tensor.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-class Softmax1x1 : public GPUOperation
-{
-public:
-  Softmax1x1() = default;
-  explicit Softmax1x1(const OperationDef &definition);
-
-  absl::Status BindArguments(ArgumentsBinder *args) override;
-  int3 GetGridSize() const override;
-
-  // Move only
-  Softmax1x1(Softmax1x1 &&kernel);
-  Softmax1x1 &operator=(Softmax1x1 &&kernel);
-  Softmax1x1(const Softmax1x1 &) = delete;
-  Softmax1x1 &operator=(const Softmax1x1 &) = delete;
-
-  friend Softmax1x1 CreateSoftmax1x1();
-
-private:
-  std::string GetSoftmaxKernelCode(const OperationDef &op_def);
-};
-
-Softmax1x1 CreateSoftmax1x1(const OperationDef &definition);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPEN_CL_KERNELS_SOFTMAX1X1_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/TuningParameters.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/TuningParameters.h
deleted file mode 100644
index 3d99b4fda..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/TuningParameters.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_KERNELS_TUNING_PARAMETERS_H__
-#define __ONERT_BACKEND_GPU_CL_KERNELS_TUNING_PARAMETERS_H__
-
-#include "open_cl/ClCommandQueue.h"
-#include "open_cl/DeviceInfo.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-enum class TuningType
-{
-  EXHAUSTIVE,
-  FAST
-};
-
-struct TuningParameters
-{
-  ProfilingCommandQueue *queue;
-  const DeviceInfo *info;
-  TuningType tuning_type = TuningType::EXHAUSTIVE;
-};
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_KERNELS_TUNING_PARAMETERS_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Util.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/Util.cc
deleted file mode 100644
index df42c66e8..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Util.cc
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "Util.h"
-
-#include <cfloat>
-#include <cmath>
-#include <string>
-#include <vector>
-
-#include "absl/strings/str_cat.h"
-#include "absl/strings/substitute.h"
-#include "open_cl/Precision.h"
-#include "open_cl/DataType.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-std::string GetCommonDefines(CalculationsPrecision precision)
-{
-  std::string result;
-
-  switch (precision)
-  {
-    case CalculationsPrecision::F32:
-      result += "#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n";
-      result += "#define ACCUM_FLT4 float4\n";
-      result += "#define FLT float\n";
-      result += "#define FLT2 float2\n";
-      result += "#define FLT3 float3\n";
-      result += "#define FLT4 float4\n";
-      result += "#define TO_FLT4 convert_float4\n";
-      result += "#define TO_ACCUM_TYPE convert_float4\n";
-      result += "#define TO_ACCUM_FLT convert_float\n";
-      break;
-    case CalculationsPrecision::F16:
-      result += "#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n";
-      result += "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
-      result += "#define ACCUM_FLT4 half4\n";
-      result += "#define FLT half\n";
-      result += "#define FLT2 half2\n";
-      result += "#define FLT3 half3\n";
-      result += "#define FLT4 half4\n";
-      result += "#define TO_FLT4 convert_half4\n";
-      result += "#define TO_ACCUM_TYPE convert_half4\n";
-      result += "#define TO_ACCUM_FLT convert_half\n";
-      break;
-    case CalculationsPrecision::F32_F16:
-      result += "#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n";
-      result += "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
-      result += "#define ACCUM_FLT4 float4\n";
-      result += "#define FLT half\n";
-      result += "#define FLT2 half2\n";
-      result += "#define FLT3 half3\n";
-      result += "#define FLT4 half4\n";
-      result += "#define TO_FLT4 convert_half4\n";
-      result += "#define TO_ACCUM_TYPE convert_float4\n";
-      result += "#define TO_ACCUM_FLT convert_float\n";
-      break;
-  }
-  return result;
-}
-
-std::string GetXStrideCorrectedV2(const std::string &src_x, const std::string &batch_size,
-                                  const std::string &stride_x, const std::string &padding_x)
-{
-  // int p0 = src_x / batch_size;\n";
-  // int b0 = src_x % batch_size;\n";
-  // return (p0 * stride_x + padding_x) * batch_size + b0;\n";
-  return absl::Substitute("(((($0) / $1) * $2 + $3) * $1 + ($0) % $1)", src_x, batch_size, stride_x,
-                          padding_x);
-}
-
-float4 GetMaskForLastPlane(int channels)
-{
-  float4 mask = float4(0.0f);
-  const int reminder = channels % 4 == 0 ? 4 : channels % 4;
-  for (int i = 0; i < reminder; ++i)
-  {
-    mask[i] = 1.0f;
-  }
-  return mask;
-}
-
-int3 GetFirstSuitableWorkGroup(const std::vector<int3> &wgs, int max_wg_size)
-{
-  for (const auto &wg : wgs)
-  {
-    const int wg_size = wg.x * wg.y * wg.z;
-    if (wg_size <= max_wg_size)
-    {
-      return wg;
-    }
-  }
-  return {1, 1, 1};
-}
-
-int GetRecommendedBlockSizeForConv(const DeviceInfo &device_info, CalculationsPrecision precision,
-                                   int task_size)
-{
-  const float task_size_per_cu = task_size / static_cast<float>(device_info.compute_units_count);
-  int block_size = 1;
-  float threshold_1 = FLT_MAX;
-  float threshold_2 = FLT_MAX;
-  float threshold_4 = FLT_MAX;
-  if (!device_info.IsMali())
-  {
-    return 1;
-  }
-  MaliInfo mali_info = device_info.mali_info;
-  switch (precision)
-  {
-    case CalculationsPrecision::F16:
-      if (mali_info.IsBifrostGen1())
-      {
-        threshold_1 = 256.0f;
-        threshold_2 = 256.0f * 4.0f;
-        threshold_4 = 256.0f * 8.0f;
-      }
-      else if (mali_info.IsBifrostGen2())
-      {
-        threshold_1 = 256.0f * 2.0f;
-        threshold_2 = 256.0f * 8.0f;
-        threshold_4 = 256.0f * 16.0f;
-      }
-      else if (mali_info.IsBifrostGen3() || mali_info.IsValhall())
-      {
-        threshold_1 = 256.0f;
-        threshold_2 = 256.0f * 6.0f;
-        threshold_4 = 256.0f * 16.0f;
-      }
-      else if (mali_info.IsMidgard())
-      {
-        threshold_1 = 256.0f * 4.0f;
-        threshold_2 = 256.0f * 16.0f;
-      }
-      break;
-    case CalculationsPrecision::F32_F16:
-      if (mali_info.IsBifrostGen1())
-      {
-        threshold_1 = 256.0f;
-        threshold_2 = 256.0f * 3.0f;
-        threshold_4 = 256.0f * 32.0f;
-      }
-      else if (mali_info.IsBifrostGen2())
-      {
-        threshold_1 = 256.0f * 2.0f;
-        threshold_2 = 256.0f * 8.0f;
-      }
-      else if (mali_info.IsBifrostGen3() || mali_info.IsValhall())
-      {
-        threshold_1 = 256.0f;
-        threshold_2 = 256.0f * 8.0f;
-      }
-      else if (mali_info.IsMidgard())
-      {
-        threshold_1 = 256.0f * 4.0f;
-      }
-      break;
-    case CalculationsPrecision::F32:
-      if (mali_info.IsBifrostGen1())
-      {
-        threshold_1 = 256.0f;
-        threshold_2 = 256.0f * 4.0f;
-      }
-      else if (mali_info.IsBifrostGen2())
-      {
-        threshold_1 = 128.0f;
-        threshold_2 = 256.0f * 4.0f;
-      }
-      else if (mali_info.IsBifrostGen3() || mali_info.IsValhall())
-      {
-        threshold_1 = 256.0f;
-        threshold_2 = 256.0f * 12.0f;
-      }
-      else if (mali_info.IsMidgard())
-      {
-        threshold_1 = 256.0f * 16.0f;
-      }
-      break;
-  }
-  if (task_size_per_cu <= threshold_1)
-  {
-    block_size = 1;
-  }
-  else if (task_size_per_cu <= threshold_2)
-  {
-    block_size = 2;
-  }
-  else if (task_size_per_cu <= threshold_4)
-  {
-    block_size = 4;
-  }
-  else
-  {
-    block_size = 8;
-  }
-  return block_size;
-}
-
-int3 GetWorkGroupsCount(const int3 &grid_size, const int3 &work_group_size)
-{
-  int3 work_groups_count;
-  work_groups_count.x = DivideRoundUp(grid_size.x, work_group_size.x);
-  work_groups_count.y = DivideRoundUp(grid_size.y, work_group_size.y);
-  work_groups_count.z = DivideRoundUp(grid_size.z, work_group_size.z);
-  return work_groups_count;
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/Util.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/Util.h
deleted file mode 100644
index 8363862c1..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/Util.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_UTIL_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_UTIL_H__
-
-#include <string>
-#include <vector>
-
-#include "open_cl/DeviceInfo.h"
-#include "open_cl/Precision.h"
-#include "open_cl/DataType.h"
-#include "open_cl/Shape.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/Types.h"
-#include "open_cl/Util.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-std::string GetCommonDefines(CalculationsPrecision precision);
-
-// Calculates correct X coordinate when stride != 1 and batch != 1 for layouts
-// with B after W (for example HWBC4) and WB stored in one axis of GPU
-// resources.
-std::string GetXStrideCorrected(const std::string &src_x, const std::string &batch_size,
-                                const std::string &stride_x, const std::string &padding_x);
-
-// Calculates correct X coordinate when stride != 1 and batch != 1 for layouts
-// with B after W (for example HWBC4) and WB stored in one axis of GPU
-// resources.
-std::string GetXStrideCorrectedV2(const std::string &src_x, const std::string &batch_size,
-                                  const std::string &stride_x, const std::string &padding_x);
-
-// Returns float4 mask for last plane(batch of 4 channels)
-// assumes that plane size is 4;
-// for example we have 7 channels, in our data structures we align it to 8
-// but 8s-channel will be empty, then last plane (batch of 4 channels) will
-// have this mask (1, 1, 1, 0).
-float4 GetMaskForLastPlane(int channels);
-
-// returns first work group from wgs that has size not bigger than max_wg_size
-// if no suitable groups among wgs, returns {1, 1, 1}
-int3 GetFirstSuitableWorkGroup(const std::vector<int3> &wgs, int max_wg_size);
-
-// task_size as amount of FLT4 processed elements.
-int GetRecommendedBlockSizeForConv(const DeviceInfo &device, CalculationsPrecision precision,
-                                   int task_size);
-
-int3 GetWorkGroupsCount(const int3 &grid_size, const int3 &work_group_size);
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_UTIL_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/WorkGroupPicking.cc b/runtime/onert/backend/gpu_cl/open_cl/kernels/WorkGroupPicking.cc
deleted file mode 100644
index 214fec271..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/WorkGroupPicking.cc
+++ /dev/null
@@ -1,348 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "WorkGroupPicking.h"
-
-#include <algorithm>
-#include <limits>
-#include <set>
-#include <vector>
-
-#include "open_cl/Util.h"
-#include "open_cl/Types.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-namespace
-{
-
-std::vector<int2> Get2DWorkgroupsEqualTo128()
-{
-  return {{128, 1}, {64, 2}, {32, 4}, {16, 8}, {8, 16}, {4, 32}, {2, 64}, {1, 128}};
-}
-
-std::vector<int3> GenerateWorkGroupSizesXYMultipleOf(int multiplier, int3 grid,
-                                                     const KernelInfo &kernel_info,
-                                                     const DeviceInfo &device_info,
-                                                     WorkGroupSizeAlignment z_alignment)
-{
-  std::vector<int3> work_groups;
-  work_groups.reserve(32);
-
-  std::vector<int> possible_z_sizes = GetPossibleSizes(grid.z, z_alignment);
-
-  for (int x = 1; x <= kernel_info.max_work_group_size; x *= 2)
-  {
-    for (int y = 1; y <= kernel_info.max_work_group_size; y *= 2)
-    {
-      int work_group_size_xy = x * y;
-      if (work_group_size_xy % multiplier != 0 ||
-          work_group_size_xy > kernel_info.max_work_group_size)
-      {
-        continue;
-      }
-      for (auto z : possible_z_sizes)
-      {
-        if (work_group_size_xy * z > kernel_info.max_work_group_size)
-        {
-          continue;
-        }
-        if (x <= device_info.max_work_group_size_x && y <= device_info.max_work_group_size_y &&
-            z <= device_info.max_work_group_size_z)
-        {
-          work_groups.push_back({x, y, z});
-        }
-      }
-    }
-  }
-  return work_groups;
-}
-
-std::vector<int3> GenerateWorkGroupSizesXMultipleOf(int multiplier, int3 grid,
-                                                    const KernelInfo &kernel_info,
-                                                    const DeviceInfo &device_info,
-                                                    WorkGroupSizeAlignment z_alignment)
-{
-  std::vector<int3> work_groups;
-  work_groups.reserve(32);
-
-  std::vector<int> possible_z_sizes = GetPossibleSizes(grid.z, z_alignment);
-  std::vector<int> possible_y_sizes = GetPossibleSizes(grid.y, WorkGroupSizeAlignment::PRECISE);
-
-  for (int x = multiplier; x <= kernel_info.max_work_group_size && x < grid.x + multiplier;
-       x += multiplier)
-  {
-    for (auto y : possible_y_sizes)
-    {
-      for (auto z : possible_z_sizes)
-      {
-        if (x <= device_info.max_work_group_size_x && y <= device_info.max_work_group_size_y &&
-            z <= device_info.max_work_group_size_z && x * y * z <= kernel_info.max_work_group_size)
-        {
-          work_groups.push_back({x, y, z});
-        }
-      }
-    }
-  }
-  return work_groups;
-}
-
-void GetWorkGroupsAlignedToGrid(const DeviceInfo &device_info, const KernelInfo &kernel_info,
-                                const int3 &grid, std::vector<int3> *work_groups)
-{
-  int3 max_wg_size;
-  max_wg_size.x = device_info.max_work_group_size_x;
-  max_wg_size.y = device_info.max_work_group_size_y;
-  max_wg_size.z = device_info.max_work_group_size_z;
-  GenerateWorkGroupSizesAlignedToGrid(grid, max_wg_size, kernel_info.max_work_group_size,
-                                      work_groups);
-}
-
-int GetPenalty(int grid_size, int group_size)
-{
-  const int reminder = grid_size % group_size;
-  return reminder == 0 ? 0 : group_size - reminder;
-}
-
-int GetPenalty(int2 grid_size, int2 group_size)
-{
-  const int p_x = GetPenalty(grid_size.x, group_size.x);
-  const int p_y = GetPenalty(grid_size.y, group_size.y);
-  return p_x * grid_size.y + p_y * grid_size.x + p_x * p_y;
-}
-
-int GetMaxSizeWithMinPenalty(int size, int max_size)
-{
-  int best_size = 128;
-  int min_penalty = GetPenalty(size, best_size);
-  for (int i = 2; i * 128 <= max_size; ++i)
-  {
-    if (GetPenalty(size, i * 128) == min_penalty)
-    {
-      best_size = i * 128;
-    }
-  }
-  return best_size;
-}
-
-int2 GetMaxSizeWithMinPenalty(int2 size, int max_size)
-{
-  std::vector<int2> base_groups = Get2DWorkgroupsEqualTo128();
-  int min_penalty = std::numeric_limits<int>::max();
-  for (const auto &group : base_groups)
-  {
-    min_penalty = std::min(GetPenalty(size, group), min_penalty);
-  }
-  for (const auto &group : base_groups)
-  {
-    for (int y = 1; y * group.y <= max_size; ++y)
-    {
-      int new_group_y = y * group.y;
-      for (int x = 1; x * group.x <= max_size; ++x)
-      {
-        int new_group_x = x * group.x;
-        if (new_group_x * new_group_y > max_size)
-        {
-          break;
-        }
-        if (GetPenalty(size, int2(new_group_x, new_group_y)) == min_penalty)
-        {
-          return int2(new_group_x, new_group_y);
-        }
-      }
-    }
-  }
-  return int2(0, 0);
-}
-
-int GetBiggestDividerWithPriority(int number, int max_divider)
-{
-  if (number % 8 == 0 && 8 <= max_divider)
-  {
-    return 8;
-  }
-  if (number % 4 == 0 && 4 <= max_divider)
-  {
-    return 4;
-  }
-  if (number % 2 == 0 && 2 <= max_divider)
-  {
-    return 2;
-  }
-  for (int i = max_divider; i != 0; i--)
-  {
-    if (number % i == 0)
-    {
-      return i;
-    }
-  }
-  return 1;
-}
-
-int GetBiggestDivider(int number, int max_divider)
-{
-  for (int i = max_divider; i != 0; i--)
-  {
-    if (number % i == 0)
-    {
-      return i;
-    }
-  }
-  return 1;
-}
-
-} // namespace
-
-int3 GetWorkGroupXY128ConvLinear(const int3 &grid)
-{
-  int grid_z = GetBiggestDividerWithPriority(grid.z, 4);
-  if (grid.x <= 128)
-  {
-    return int3(128, 1, grid_z);
-  }
-  int grid_x = GetMaxSizeWithMinPenalty(grid.x, 512 / grid_z);
-  return {grid_x, 1, grid_z};
-}
-
-int3 GetWorkGroupXY128Conv(const int3 &grid)
-{
-  int grid_z = GetBiggestDividerWithPriority(grid.z, 4);
-  if (grid.x <= 16 && grid.y <= 8)
-  {
-    return int3(16, 8, grid_z);
-  }
-  int2 grid_xy = GetMaxSizeWithMinPenalty(int2(grid.x, grid.y), 512 / grid_z);
-  return int3(grid_xy.x, grid_xy.y, grid_z);
-}
-
-// int3 GetWorkGroupXY128Simple(const int3& grid) { return int3(16, 8, 1); }
-
-int3 GetWorkGroup(const int3 &grid, int max_size)
-{
-  int wg_z = GetBiggestDividerWithPriority(grid.z, 8);
-  int wg_xy_size = max_size / wg_z;
-  int wg_x = std::min(DivideRoundUp(grid.x, 2), wg_xy_size);
-  int wg_y = std::min(wg_xy_size / wg_x, grid.y);
-  return int3(wg_x, wg_y, wg_z);
-}
-
-int3 GetWorkGroupConv(const int3 &grid, int max_size, int max_z_size)
-{
-  int wg_z = GetBiggestDivider(grid.z, max_z_size);
-  int wg_xy_size = std::min(256, max_size) / wg_z;
-  int wg_x = std::min(grid.x, wg_xy_size);
-  int wg_y = std::min(wg_xy_size / wg_x, grid.y);
-  if (wg_y == grid.y && grid.y % 2 == 0)
-  {
-    wg_y = grid.y / 2;
-  }
-  return int3(wg_x, wg_y, wg_z);
-}
-
-void GetPossibleWorkGroupsXYMultipleOf(int multiplier, const DeviceInfo &device_info,
-                                       const KernelInfo &kernel_info, const int3 &grid,
-                                       WorkGroupSizeAlignment z_alignment,
-                                       std::vector<int3> *work_groups)
-{
-  *work_groups =
-    GenerateWorkGroupSizesXYMultipleOf(multiplier, grid, kernel_info, device_info, z_alignment);
-}
-
-void GetPossibleWorkGroupsXMultipleOf(int multiplier, const DeviceInfo &device_info,
-                                      const KernelInfo &kernel_info, const int3 &grid,
-                                      WorkGroupSizeAlignment z_alignment,
-                                      std::vector<int3> *work_groups)
-{
-  *work_groups =
-    GenerateWorkGroupSizesXMultipleOf(multiplier, grid, kernel_info, device_info, z_alignment);
-}
-
-bool XY128RequiresMoreWorkGroupsThenXY128Linear(int width, int height)
-{
-  int planar_work_groups = DivideRoundUp(width * height, 128);
-  auto base_work_groups = Get2DWorkgroupsEqualTo128();
-  bool have_equal_work_groups = false;
-  for (auto &work_group : base_work_groups)
-  {
-    int x_groups = DivideRoundUp(width, work_group.x);
-    int y_groups = DivideRoundUp(height, work_group.y);
-    int xy_groups = x_groups * y_groups;
-    if (xy_groups == planar_work_groups)
-    {
-      have_equal_work_groups = true;
-      break;
-    }
-  }
-  return !have_equal_work_groups;
-}
-
-void GetPossibleWorkGroups(TuningType tuning_type, const DeviceInfo &device_info,
-                           const KernelInfo &kernel_info, const int3 &grid,
-                           std::vector<int3> *work_groups)
-{
-  switch (tuning_type)
-  {
-    case TuningType::FAST:
-      work_groups->push_back(GetWorkGroup(grid, kernel_info.max_work_group_size));
-      return;
-    case TuningType::EXHAUSTIVE:
-    {
-      GetWorkGroupsAlignedToGrid(device_info, kernel_info, grid, work_groups);
-      return;
-    }
-    default:
-      work_groups->push_back({8, 4, 1});
-      return;
-  }
-}
-
-void GetPossibleWorkGroupsConv(TuningType tuning_type, const DeviceInfo &device_info,
-                               const KernelInfo &kernel_info, const int3 &grid,
-                               std::vector<int3> *work_groups)
-{
-  switch (tuning_type)
-  {
-    case TuningType::FAST:
-    {
-      int max_z_size = 16;
-      if (device_info.IsAdreno())
-      {
-        max_z_size = device_info.IsAdreno3xx() ? 16 : 64;
-      }
-      max_z_size = std::min(max_z_size, device_info.max_work_group_size_z);
-      work_groups->push_back(GetWorkGroupConv(grid, kernel_info.max_work_group_size, max_z_size));
-      return;
-    }
-    case TuningType::EXHAUSTIVE:
-    {
-      GetWorkGroupsAlignedToGrid(device_info, kernel_info, grid, work_groups);
-      return;
-    }
-    default:
-      work_groups->push_back({8, 4, 1});
-      return;
-  }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/kernels/WorkGroupPicking.h b/runtime/onert/backend/gpu_cl/open_cl/kernels/WorkGroupPicking.h
deleted file mode 100644
index c19890de1..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/kernels/WorkGroupPicking.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_WROK_GROUP_PICKING_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_WROK_GROUP_PICKING_H__
-
-#include <vector>
-
-#include "TuningParameters.h"
-
-#include "open_cl/ClKernel.h"
-#include "open_cl/DeviceInfo.h"
-#include "open_cl/Types.h"
-#include "open_cl/WorkgroupSelection.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-// multiplier can be power of two only
-void GetPossibleWorkGroupsXYMultipleOf(int multiplier, const DeviceInfo &device_info,
-                                       const KernelInfo &kernel_info, const int3 &grid,
-                                       WorkGroupSizeAlignment z_alignment,
-                                       std::vector<int3> *work_groups);
-
-void GetPossibleWorkGroupsXMultipleOf(int multiplier, const DeviceInfo &device_info,
-                                      const KernelInfo &kernel_info, const int3 &grid,
-                                      WorkGroupSizeAlignment z_alignment,
-                                      std::vector<int3> *work_groups);
-
-int3 GetWorkGroupXY128ConvLinear(const int3 &grid);
-
-int3 GetWorkGroupXY128Simple(const int3 &grid);
-int3 GetWorkGroupXY128Conv(const int3 &grid);
-
-bool XY128RequiresMoreWorkGroupsThenXY128Linear(int width, int height);
-
-void GetPossibleWorkGroups(TuningType tuning_type, const DeviceInfo &device_info,
-                           const KernelInfo &kernel_info, const int3 &grid,
-                           std::vector<int3> *work_groups);
-
-void GetPossibleWorkGroupsConv(TuningType tuning_type, const DeviceInfo &device_info,
-                               const KernelInfo &kernel_info, const int3 &grid,
-                               std::vector<int3> *work_groups);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_KERNELS_WROK_GROUP_PICKING_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/selectors/ConvolutionSelector.cc b/runtime/onert/backend/gpu_cl/open_cl/selectors/ConvolutionSelector.cc
deleted file mode 100644
index eac6f3270..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/selectors/ConvolutionSelector.cc
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "ConvolutionSelector.h"
-
-#include "absl/memory/memory.h"
-#include "open_cl/kernels/ConvBuffer1x1.h"
-#include "open_cl/kernels/ConvConstants.h"
-#include "open_cl/kernels/ConvPowervr.h"
-#include "open_cl/kernels/ConvWeightsConverter.h"
-#include "open_cl/kernels/WorkGroupPicking.h"
-#include "open_cl/TensorType.h"
-#include "open_cl/Util.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-std::unique_ptr<GPUOperation> SelectConvolutionAdreno(const Convolution2DAttributes &attr,
-                                                      const BHWC &dst_shape,
-                                                      const DeviceInfo &device_info,
-                                                      const OperationDef &op_def, ModelHints)
-{
-  if (IsConvConstantsSupported(device_info, op_def, attr))
-  {
-    GPUOperation conv = CreateConvConstants(device_info, op_def, attr);
-    return absl::make_unique<GPUOperation>(std::move(conv));
-  }
-  else
-  {
-    ConvPowerVR conv = CreateConvPowerVR(device_info, op_def, attr, &dst_shape);
-    return absl::make_unique<ConvPowerVR>(std::move(conv));
-  }
-}
-
-std::unique_ptr<GPUOperation> SelectConvolutionWinogradAdreno(const Convolution2DAttributes &attr,
-                                                              const BHWC &dst_shape,
-                                                              const DeviceInfo &device_info,
-                                                              const OperationDef &op_def,
-                                                              ModelHints)
-{
-  ConvPowerVR conv = CreateConvPowerVRWino4x4To6x6(device_info, op_def, attr, &dst_shape);
-  return absl::make_unique<ConvPowerVR>(std::move(conv));
-}
-
-std::unique_ptr<GPUOperation>
-SelectConvolutionDynamicWeightsAdreno(const Convolution2DAttributes &attr,
-                                      const BHWC &weights_shape, const BHWC &dst_shape,
-                                      const DeviceInfo &device_info, const OperationDef &op_def,
-                                      ModelHints, ConvWeightsDescription *weights_desc)
-{
-  ConvPowerVR conv =
-    CreateConvPowerVRDynamicWeights(device_info, op_def, attr, weights_shape, &dst_shape);
-  *weights_desc = conv.GetConvWeightsDescription();
-  return absl::make_unique<ConvPowerVR>(std::move(conv));
-}
-
-std::unique_ptr<GPUOperation> SelectConvolutionNVidia(const Convolution2DAttributes &attr,
-                                                      const BHWC &dst_shape,
-                                                      const DeviceInfo &device_info,
-                                                      const OperationDef &op_def)
-{
-  if (IsConvConstantsSupported(device_info, op_def, attr))
-  {
-    GPUOperation conv = CreateConvConstants(device_info, op_def, attr);
-    return absl::make_unique<GPUOperation>(std::move(conv));
-  }
-  else
-  {
-    ConvPowerVR conv = CreateConvPowerVR(device_info, op_def, attr, &dst_shape);
-    return absl::make_unique<ConvPowerVR>(std::move(conv));
-  }
-}
-
-std::unique_ptr<GPUOperation> SelectConvolutionPowerVR(const Convolution2DAttributes &attr,
-                                                       const DeviceInfo &device_info,
-                                                       const OperationDef &op_def)
-{
-  ConvPowerVR conv = CreateConvPowerVR(device_info, op_def, attr);
-  return absl::make_unique<ConvPowerVR>(std::move(conv));
-}
-
-std::unique_ptr<GPUOperation> SelectConvolutionMali(const Convolution2DAttributes &attr,
-                                                    const BHWC &dst_shape,
-                                                    const DeviceInfo &device_info,
-                                                    const OperationDef &op_def)
-{
-  if (op_def.src_tensors[0].storage_type == TensorStorageType::BUFFER &&
-      IsConvBuffer1x1Supported(op_def, attr))
-  {
-    ConvBuffer1x1 conv = CreateConvBuffer1x1(device_info, op_def, attr, &dst_shape);
-    return absl::make_unique<ConvBuffer1x1>(std::move(conv));
-  }
-  else
-  {
-    ConvPowerVR conv = CreateConvPowerVR(device_info, op_def, attr, &dst_shape);
-    return absl::make_unique<ConvPowerVR>(std::move(conv));
-  }
-}
-
-std::unique_ptr<GPUOperation> SelectConvolutionWinogradMali(const Convolution2DAttributes &attr,
-                                                            const BHWC &dst_shape,
-                                                            const DeviceInfo &device_info,
-                                                            const OperationDef &op_def)
-{
-  if (op_def.src_tensors[0].storage_type == TensorStorageType::BUFFER)
-  {
-    ConvBuffer1x1 conv = CreateConvBuffer1x1Wino4x4To6x6(device_info, op_def, attr, &dst_shape);
-    return absl::make_unique<ConvBuffer1x1>(std::move(conv));
-  }
-  else
-  {
-    ConvPowerVR conv = CreateConvPowerVRWino4x4To6x6(device_info, op_def, attr, &dst_shape);
-    return absl::make_unique<ConvPowerVR>(std::move(conv));
-  }
-}
-
-std::unique_ptr<GPUOperation>
-SelectConvolutionDynamicWeightsMali(const Convolution2DAttributes &attr, const BHWC &weights_shape,
-                                    const BHWC &dst_shape, const DeviceInfo &device_info,
-                                    const OperationDef &op_def, ModelHints,
-                                    ConvWeightsDescription *weights_desc)
-{
-  if (op_def.src_tensors[0].storage_type == TensorStorageType::BUFFER &&
-      IsConvBuffer1x1Supported(op_def, weights_shape, attr))
-  {
-    ConvBuffer1x1 conv =
-      CreateConvBuffer1x1DynamicWeights(device_info, op_def, attr, weights_shape, &dst_shape);
-    *weights_desc = conv.GetConvWeightsDescription();
-    return absl::make_unique<ConvBuffer1x1>(std::move(conv));
-  }
-  else
-  {
-    ConvPowerVR conv =
-      CreateConvPowerVRDynamicWeights(device_info, op_def, attr, weights_shape, &dst_shape);
-    *weights_desc = conv.GetConvWeightsDescription();
-    return absl::make_unique<ConvPowerVR>(std::move(conv));
-  }
-}
-
-} // namespace
-
-std::unique_ptr<GPUOperation> SelectConvolution(const Convolution2DAttributes &attr,
-                                                const BHWC &dst_shape,
-                                                const DeviceInfo &device_info,
-                                                const OperationDef &op_def, ModelHints hints)
-{
-  if (device_info.IsAdreno())
-  {
-    return SelectConvolutionAdreno(attr, dst_shape, device_info, op_def, hints);
-  }
-  else if (device_info.IsPowerVR() || device_info.IsAMD() || device_info.IsIntel())
-  {
-    return SelectConvolutionPowerVR(attr, device_info, op_def);
-  }
-  else if (device_info.IsNvidia())
-  {
-    return SelectConvolutionNVidia(attr, dst_shape, device_info, op_def);
-  }
-  else if (device_info.IsMali())
-  {
-    return SelectConvolutionMali(attr, dst_shape, device_info, op_def);
-  }
-  else
-  {
-    return SelectConvolutionAdreno(attr, dst_shape, device_info, op_def, hints);
-  }
-}
-
-std::unique_ptr<GPUOperation> SelectConvolutionForWinograd(const Convolution2DAttributes &attr,
-                                                           const BHWC &dst_shape,
-                                                           const DeviceInfo &device_info,
-                                                           const OperationDef &op_def,
-                                                           ModelHints hints)
-{
-  if (device_info.IsAdreno())
-  {
-    return SelectConvolutionWinogradAdreno(attr, dst_shape, device_info, op_def, hints);
-  }
-  else if (device_info.IsPowerVR() || device_info.IsAMD() || device_info.IsNvidia() ||
-           device_info.IsIntel())
-  {
-    ConvPowerVR conv = CreateConvPowerVRWino4x4To6x6(device_info, op_def, attr, &dst_shape);
-    return absl::make_unique<ConvPowerVR>(std::move(conv));
-  }
-  else if (device_info.IsMali())
-  {
-    return SelectConvolutionWinogradMali(attr, dst_shape, device_info, op_def);
-  }
-  else
-  {
-    return SelectConvolutionWinogradAdreno(attr, dst_shape, device_info, op_def, hints);
-  }
-}
-
-std::unique_ptr<GPUOperation>
-SelectConvolutionWithDynamicWeights(const Convolution2DAttributes &attr, const BHWC &weights_shape,
-                                    const BHWC &dst_shape, const DeviceInfo &device_info,
-                                    const OperationDef &op_def, ModelHints hints,
-                                    ConvWeightsDescription *weights_desc)
-{
-  if (device_info.IsAdreno())
-  {
-    return SelectConvolutionDynamicWeightsAdreno(attr, weights_shape, dst_shape, device_info,
-                                                 op_def, hints, weights_desc);
-  }
-  else if (device_info.IsMali())
-  {
-    return SelectConvolutionDynamicWeightsMali(attr, weights_shape, dst_shape, device_info, op_def,
-                                               hints, weights_desc);
-  }
-  else
-  {
-    ConvPowerVR conv =
-      CreateConvPowerVRDynamicWeights(device_info, op_def, attr, weights_shape, &dst_shape);
-    *weights_desc = conv.GetConvWeightsDescription();
-    return absl::make_unique<ConvPowerVR>(std::move(conv));
-  }
-}
-
-std::unique_ptr<GPUOperation>
-SelectConverterToConvWeights(const ConvWeightsDescription &weights_desc, const OperationDef &op_def,
-                             ModelHints)
-{
-  ConverterToConvWeights converter = ConverterToConvWeights(op_def, weights_desc);
-  return absl::make_unique<ConverterToConvWeights>(std::move(converter));
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/selectors/ConvolutionSelector.h b/runtime/onert/backend/gpu_cl/open_cl/selectors/ConvolutionSelector.h
deleted file mode 100644
index d45eea8bd..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/selectors/ConvolutionSelector.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_CONVOLUTION_SELECTOR_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_CONVOLUTION_SELECTOR_H__
-
-#include <memory>
-
-#include "open_cl/kernels/ConvCommon.h"
-#include "open_cl/kernels/GpuOperation.h"
-#include "open_cl/ModelHints.h"
-#include "open_cl/Operations.h"
-#include "open_cl/Shape.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-std::unique_ptr<GPUOperation> SelectConvolution(const Convolution2DAttributes &attr,
-                                                const BHWC &dst_shape,
-                                                const DeviceInfo &device_info,
-                                                const OperationDef &op_def, ModelHints hints);
-
-std::unique_ptr<GPUOperation> SelectConvolutionForWinograd(const Convolution2DAttributes &attr,
-                                                           const BHWC &dst_shape,
-                                                           const DeviceInfo &device_info,
-                                                           const OperationDef &op_def,
-                                                           ModelHints hints);
-
-std::unique_ptr<GPUOperation>
-SelectConvolutionWithDynamicWeights(const Convolution2DAttributes &attr, const BHWC &weights_shape,
-                                    const BHWC &dst_shape, const DeviceInfo &device_info,
-                                    const OperationDef &op_def, ModelHints hints,
-                                    ConvWeightsDescription *weights_desc);
-
-std::unique_ptr<GPUOperation>
-SelectConverterToConvWeights(const ConvWeightsDescription &weights_desc, const OperationDef &op_def,
-                             ModelHints hints);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_CONVOLUTION_SELECTOR_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/selectors/DwConvolutionSelector.cc b/runtime/onert/backend/gpu_cl/open_cl/selectors/DwConvolutionSelector.cc
deleted file mode 100644
index f07eef689..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/selectors/DwConvolutionSelector.cc
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "DwConvolutionSelector.h"
-
-#include "absl/memory/memory.h"
-#include "open_cl/ClDevice.h"
-#include "open_cl/kernels/DepthwiseConv.h"
-#include "open_cl/kernels/DepthwiseConv3x3.h"
-#include "open_cl/Precision.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-namespace
-{
-
-std::unique_ptr<GPUOperation>
-SelectDWConvolutionAdreno(const DepthwiseConvolution2DAttributes &attr,
-                          const DeviceInfo &device_info, const OperationDef &op_def)
-{
-  if (IsDepthwiseConv3x3Supported(attr))
-  {
-    return absl::make_unique<DepthwiseConv3x3>(CreateDepthwiseConv3x3(device_info, op_def, attr));
-  }
-  else
-  {
-    return absl::make_unique<GPUOperation>(CreateDepthwiseConvolution2D(device_info, op_def, attr));
-  }
-}
-
-std::unique_ptr<GPUOperation>
-SelectDWConvolutionPowerVR(const DepthwiseConvolution2DAttributes &attr,
-                           const DeviceInfo &device_info, const OperationDef &op_def)
-{
-  if (IsDepthwiseConv3x3Supported(attr))
-  {
-    return absl::make_unique<DepthwiseConv3x3>(CreateDepthwiseConv3x3(device_info, op_def, attr));
-  }
-  else
-  {
-    return absl::make_unique<GPUOperation>(CreateDepthwiseConvolution2D(device_info, op_def, attr));
-  }
-}
-
-std::unique_ptr<GPUOperation> SelectDWConvolutionMali(const DepthwiseConvolution2DAttributes &attr,
-                                                      const DeviceInfo &device_info,
-                                                      const OperationDef &op_def)
-{
-  const auto storage_type = op_def.src_tensors[0].storage_type;
-  bool buffer_type =
-    storage_type == TensorStorageType::BUFFER || storage_type == TensorStorageType::IMAGE_BUFFER;
-  const MaliInfo mali_info = device_info.mali_info;
-  if (IsDepthwiseConv3x3Supported(attr) && !mali_info.IsMidgard() && !buffer_type &&
-      op_def.precision != CalculationsPrecision::F32)
-  {
-    return absl::make_unique<DepthwiseConv3x3>(CreateDepthwiseConv3x3(device_info, op_def, attr));
-  }
-  else
-  {
-    return absl::make_unique<GPUOperation>(CreateDepthwiseConvolution2D(device_info, op_def, attr));
-  }
-}
-} // namespace
-
-std::unique_ptr<GPUOperation> SelectDWConvolution(const DepthwiseConvolution2DAttributes &attr,
-                                                  const DeviceInfo &device_info,
-                                                  const OperationDef &op_def)
-{
-  if (device_info.IsAdreno())
-  {
-    return SelectDWConvolutionAdreno(attr, device_info, op_def);
-  }
-  else if (device_info.IsPowerVR())
-  {
-    return SelectDWConvolutionPowerVR(attr, device_info, op_def);
-  }
-  else if (device_info.IsMali())
-  {
-    return SelectDWConvolutionMali(attr, device_info, op_def);
-  }
-  else
-  {
-    return SelectDWConvolutionAdreno(attr, device_info, op_def);
-  }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/selectors/DwConvolutionSelector.h b/runtime/onert/backend/gpu_cl/open_cl/selectors/DwConvolutionSelector.h
deleted file mode 100644
index 2fa40c5c3..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/selectors/DwConvolutionSelector.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_DW_CONVOLUTION_SELECTOR_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_DW_CONVOLUTION_SELECTOR_H__
-
-#include <memory>
-
-#include "open_cl/kernels/GpuOperation.h"
-#include "open_cl/Operations.h"
-#include "open_cl/Status.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-std::unique_ptr<GPUOperation> SelectDWConvolution(const DepthwiseConvolution2DAttributes &attr,
-                                                  const DeviceInfo &device_info,
-                                                  const OperationDef &op_def);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_DW_CONVOLUTION_SELECTOR_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/selectors/SimpleSelectors.cc b/runtime/onert/backend/gpu_cl/open_cl/selectors/SimpleSelectors.cc
deleted file mode 100644
index ac514b26c..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/selectors/SimpleSelectors.cc
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "SimpleSelectors.h"
-
-#include <memory>
-#include <set>
-
-#include "open_cl/kernels/Add.h"
-#include "open_cl/kernels/DepthwiseConv.h"
-#include "open_cl/kernels/Pooling.h"
-#include "open_cl/kernels/Relu.h"
-#include "open_cl/kernels/Reshape.h"
-#include "open_cl/kernels/Reshapex4.h"
-#include "open_cl/kernels/Softmax.h"
-#include "open_cl/kernels/Softmax1x1.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-void SelectAdd(const OperationDef &op_def, const std::vector<int> &channels, int dst_channels,
-               std::unique_ptr<GPUOperation> *ptr)
-{
-  GPUOperation operation = CreateAdd(op_def, channels, dst_channels);
-  *ptr = std::make_unique<GPUOperation>(std::move(operation));
-}
-
-std::unique_ptr<GPUOperation>
-SelectDWConvolutionDynamicWeights(const DepthwiseConvolution2DAttributes &attr,
-                                  const DeviceInfo &device_info, const OperationDef &op_def)
-{
-  return absl::make_unique<GPUOperation>(
-    CreateDepthwiseConvolution2DDynamicWeights(device_info, op_def, attr));
-}
-
-std::unique_ptr<GPUOperation> SelectPooling(const Pooling2DAttributes &attr,
-                                            const OperationDef &op_def)
-{
-  GPUOperation operation = CreatePooling(op_def, attr);
-  return absl::make_unique<GPUOperation>(std::move(operation));
-}
-
-std::unique_ptr<GPUOperation> SelectReLU(const ReLUAttributes &attr, const OperationDef &op_def)
-{
-  return absl::make_unique<GPUOperation>(CreateReLU(op_def, attr));
-}
-
-void SelectReshape(int src_channels, int dst_channels, const OperationDef &op_def,
-                   std::unique_ptr<GPUOperation> *ptr)
-{
-  if (src_channels % 4 == 0 && dst_channels % 4 == 0)
-  {
-    GPUOperation operation = CreateReshapex4(op_def);
-    *ptr = std::make_unique<GPUOperation>(std::move(operation));
-  }
-  else
-  {
-    GPUOperation operation = CreateReshape(op_def);
-    *ptr = std::make_unique<GPUOperation>(std::move(operation));
-  }
-}
-
-void SelectSoftmax(const BHWC &shape, const OperationDef &op_def,
-                   std::unique_ptr<GPUOperation> *ptr)
-{
-  if (shape.w == 1 && shape.h == 1)
-  {
-    Softmax1x1 operation = CreateSoftmax1x1(op_def);
-    *ptr = absl::make_unique<Softmax1x1>(std::move(operation));
-  }
-  else
-  {
-    GPUOperation operation = CreateSoftmax(op_def);
-    *ptr = absl::make_unique<GPUOperation>(std::move(operation));
-  }
-}
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/open_cl/selectors/SimpleSelectors.h b/runtime/onert/backend/gpu_cl/open_cl/selectors/SimpleSelectors.h
deleted file mode 100644
index 2c5837a1d..000000000
--- a/runtime/onert/backend/gpu_cl/open_cl/selectors/SimpleSelectors.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_SIMPLE_SELECTORS_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_SIMPLE_SELECTORS_H__
-
-#include <memory>
-
-#include "open_cl/ClDevice.h"
-#include "open_cl/kernels/GpuOperation.h"
-#include "open_cl/Operations.h"
-#include "open_cl/Shape.h"
-
-namespace onert
-{
-namespace backend
-{
-namespace gpu_cl
-{
-
-void SelectAdd(const OperationDef &op_def, const std::vector<int> &channels, int dst_channels,
-               std::unique_ptr<GPUOperation> *ptr);
-
-std::unique_ptr<GPUOperation>
-SelectDWConvolutionDynamicWeights(const DepthwiseConvolution2DAttributes &attr,
-                                  const DeviceInfo &device_info, const OperationDef &op_def);
-
-std::unique_ptr<GPUOperation> SelectPooling(const Pooling2DAttributes &attr,
-                                            const OperationDef &op_def);
-
-std::unique_ptr<GPUOperation> SelectReLU(const ReLUAttributes &attr, const OperationDef &op_def);
-
-void SelectReshape(int src_channels, int dst_channels, const OperationDef &op_def,
-                   std::unique_ptr<GPUOperation> *ptr);
-
-void SelectSoftmax(const BHWC &shape, const OperationDef &op_def,
-                   std::unique_ptr<GPUOperation> *ptr);
-
-} // namespace gpu_cl
-} // namespace backend
-} // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_SELECTORS_SIMPLE_SELECTORS_H__
diff --git a/runtime/onert/backend/gpu_cl/operand/CLTensor.cc b/runtime/onert/backend/gpu_cl/operand/CLTensor.cc
index 6dd9bd252..d3ed102a1 100644
--- a/runtime/onert/backend/gpu_cl/operand/CLTensor.cc
+++ b/runtime/onert/backend/gpu_cl/operand/CLTensor.cc
@@ -16,10 +16,12 @@
 
 #include "CLTensor.h"
 
-#include "open_cl/Buffer.h"
-#include "open_cl/ClContext.h"
-#include "open_cl/Tensor.h"
-#include "open_cl/TensorType.h"
+#include "tensorflow/lite/delegates/gpu/cl/buffer.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h"
+
+using namespace tflite::gpu::cl;
 
 namespace onert
 {
@@ -30,16 +32,15 @@ namespace gpu_cl
 namespace operand
 {
 
-CLTensor::CLTensor(size_t rank, ir::Shape shape, std::shared_ptr<Environment> environment)
-  : ICLTensor{rank, shape, environment}, _tensor(std::make_shared<Tensor>())
+CLTensor::CLTensor(size_t rank, ir::Shape shape,
+                   std::shared_ptr<tflite::gpu::cl::Environment> environment, TensorType type)
+  : ICLTensor{rank, shape, environment, type}, _tensor(std::make_shared<Tensor>())
 {
 }
 
-const Tensor *CLTensor::handle() const { return _tensor.get(); }
-
-Tensor *CLTensor::handle() { return _tensor.get(); }
+const tflite::gpu::cl::Tensor *CLTensor::handle() const { return _tensor.get(); }
 
-void CLTensor::setBuffer(void *host_ptr) { (void)host_ptr; }
+tflite::gpu::cl::Tensor *CLTensor::handle() { return _tensor.get(); }
 
 } // namespace operand
 } // namespace gpu_cl
diff --git a/runtime/onert/backend/gpu_cl/operand/CLTensor.h b/runtime/onert/backend/gpu_cl/operand/CLTensor.h
index 7d2e70a99..f2153f430 100644
--- a/runtime/onert/backend/gpu_cl/operand/CLTensor.h
+++ b/runtime/onert/backend/gpu_cl/operand/CLTensor.h
@@ -19,9 +19,9 @@
 
 #include "ICLTensor.h"
 
-#include "open_cl/Buffer.h"
-#include "open_cl/ClContext.h"
-#include "open_cl/Tensor.h"
+#include "tensorflow/lite/delegates/gpu/cl/buffer.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor.h"
 
 namespace onert
 {
@@ -38,11 +38,12 @@ public:
   CLTensor() = delete;
 
 public:
-  CLTensor(size_t rank, ir::Shape shape, std::shared_ptr<Environment> environment);
+  CLTensor(size_t rank, ir::Shape shape, std::shared_ptr<tflite::gpu::cl::Environment> environment,
+           TensorType type);
 
 public:
-  const Tensor *handle() const override;
-  Tensor *handle() override;
+  const tflite::gpu::cl::Tensor *handle() const override;
+  tflite::gpu::cl::Tensor *handle() override;
 
 public:
   /** Set given buffer as the buffer of the tensor
@@ -55,7 +56,7 @@ public:
   void setBuffer(void *host_ptr);
 
 private:
-  std::shared_ptr<Tensor> _tensor;
+  std::shared_ptr<tflite::gpu::cl::Tensor> _tensor;
 };
 
 } // namespace operand
diff --git a/runtime/onert/backend/gpu_cl/operand/ICLTensor.cc b/runtime/onert/backend/gpu_cl/operand/ICLTensor.cc
index 3f070be0c..a95f78056 100644
--- a/runtime/onert/backend/gpu_cl/operand/ICLTensor.cc
+++ b/runtime/onert/backend/gpu_cl/operand/ICLTensor.cc
@@ -16,11 +16,11 @@
 
 #include "ICLTensor.h"
 
-#include "open_cl/Api.h"
-#include "open_cl/Spi.h"
-#include "open_cl/OpenclWrapper.h"
-#include "open_cl/TensorTypeUtil.h"
-#include "open_cl/kernels/Converter.h"
+#include "tensorflow/lite/delegates/gpu/api.h"
+#include "tensorflow/lite/delegates/gpu/spi.h"
+#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
+#include "tensorflow/lite/delegates/gpu/cl/kernels/converter.h"
 
 namespace onert
 {
@@ -31,6 +31,10 @@ namespace gpu_cl
 namespace operand
 {
 
+using namespace tflite::gpu;
+using namespace tflite::gpu::cl;
+using namespace tflite::gpu::internal_tensor;
+
 void ICLTensor::access(const std::function<void(ITensor &tensor)> &fn)
 {
   if (total_size() == 0)
@@ -39,100 +43,133 @@ void ICLTensor::access(const std::function<void(ITensor &tensor)> &fn)
   fn(*this);
 }
 
-void ICLTensor::enqueueWriteBuffer(const void *ptr, bool)
+void ICLTensor::writeConvertInit()
 {
-  const float *arr = (float *)ptr;
-  TensorObject input_obj = MakeReadableCpuMemory(absl::MakeSpan(arr, total_size() / 4));
+  TensorObjectDef input_def;
+  input_def.dimensions.b = handle()->Batch();
+  input_def.dimensions.h = handle()->Height();
+  input_def.dimensions.w = handle()->Width();
+  input_def.dimensions.c = handle()->Channels();
+  input_def.object_def.data_layout = DataLayout::BHWC;
+  input_def.object_def.data_type = DataType::FLOAT32;
+  input_def.object_def.object_type = ObjectType::CPU_MEMORY;
+  input_def.object_def.user_provided = true;
 
-  TensorObject output_obj;
+  TensorObjectDef permute_def = input_def;
+  permute_def.object_def.object_type = ToObjectType(handle()->GetStorageType());
 
-  if (handle()->GetStorageType() == TensorStorageType::BUFFER)
+  auto dims = permute_def.dimensions;
+  const BHWC shape(dims.b, dims.h, dims.w, dims.c);
+  const TensorDescriptor desc{
+    permute_def.object_def.data_type,
+    ToTensorStorageType(permute_def.object_def.object_type, permute_def.object_def.data_layout),
+    Layout::BHWC};
+  if (!AllocateTensorMemory(_environment->context(), shape, desc, &_cl_memory).ok())
   {
-    output_obj = OpenClBuffer{handle()->GetMemoryPtr()};
+    throw std::runtime_error("Failed to AllocateTensorMemory");
   }
-  else if (handle()->GetStorageType() == TensorStorageType::IMAGE_BUFFER)
+
+  TensorObjectDef output_def = permute_def;
+  output_def.object_def.data_layout = ToDataLayout(handle()->GetStorageType());
+  output_def.object_def.data_type = handle()->GetDataType();
+  input_def.object_def.user_provided = false;
+
+  _converter_builder = NewConverterBuilder(_environment.get());
+  if (!_converter_builder->MakeConverter(input_def, permute_def, &_converter_to).ok())
   {
-    output_obj = OpenClBuffer{handle()->GetMemoryPtrForWriting()};
+    throw std::runtime_error("Failed to make converter_to");
   }
-  else
+  if (!_converter_builder->MakeConverter(permute_def, output_def, &_converter_from).ok())
   {
-    output_obj = OpenClTexture{handle()->GetMemoryPtr()};
+    throw std::runtime_error("Failed to make converter_from");
   }
+}
+
+void ICLTensor::readConvertInit()
+{
+  _converter_builder = NewConverterBuilder(_environment.get());
 
   TensorObjectDef input_def;
   input_def.dimensions.b = handle()->Batch();
   input_def.dimensions.h = handle()->Height();
   input_def.dimensions.w = handle()->Width();
   input_def.dimensions.c = handle()->Channels();
-  input_def.object_def.data_layout = DataLayout::BHWC;
-  input_def.object_def.data_type = DataType::FLOAT32;
-  input_def.object_def.object_type = ObjectType::CPU_MEMORY;
-  input_def.object_def.user_provided = true;
+  input_def.object_def.data_layout = ToDataLayout(handle()->GetStorageType());
+  input_def.object_def.data_type = handle()->GetDataType();
+  input_def.object_def.object_type = ToObjectType(handle()->GetStorageType());
+  input_def.object_def.user_provided = false;
 
-  TensorObjectDef tmp_def;
-  tmp_def.dimensions.b = handle()->Batch();
-  tmp_def.dimensions.h = handle()->Height();
-  tmp_def.dimensions.w = handle()->Width();
-  tmp_def.dimensions.c = handle()->Channels();
-  tmp_def.object_def.data_layout = DataLayout::BHWC;
-  tmp_def.object_def.data_type = DataType::FLOAT32;
-  tmp_def.object_def.object_type = ToObjectType(handle()->GetStorageType());
-  tmp_def.object_def.user_provided = true;
-
-  auto dims = tmp_def.dimensions;
+  TensorObjectDef permute_def = input_def;
+  permute_def.object_def.data_layout = DataLayout::BHWC;
+  permute_def.object_def.data_type = DataType::FLOAT32;
+  permute_def.object_def.user_provided = true;
+
+  auto dims = permute_def.dimensions;
   const BHWC shape(dims.b, dims.h, dims.w, dims.c);
   const TensorDescriptor desc{
-    tmp_def.object_def.data_type,
-    ToTensorStorageType(tmp_def.object_def.object_type, tmp_def.object_def.data_layout),
+    permute_def.object_def.data_type,
+    ToTensorStorageType(permute_def.object_def.object_type, permute_def.object_def.data_layout),
     Layout::BHWC};
   if (!AllocateTensorMemory(_environment->context(), shape, desc, &_cl_memory).ok())
   {
-    throw std::runtime_error("AllocateTensorMemory error.");
+    throw std::runtime_error("Failed to AllocateTensorMemory");
   }
-  TensorObject tmp_obj;
-  if (tmp_def.object_def.object_type == ObjectType::OPENCL_TEXTURE)
+
+  TensorObjectDef output_def = permute_def;
+  output_def.object_def.object_type = ObjectType::CPU_MEMORY;
+
+  if (!_converter_builder->MakeConverter(input_def, permute_def, &_converter_from).ok())
   {
-    tmp_obj = OpenClTexture{_cl_memory.memory()};
+    throw std::runtime_error("Failed to make converter_from");
   }
-  else
+  if (!_converter_builder->MakeConverter(permute_def, output_def, &_converter_to).ok())
   {
-    tmp_obj = OpenClBuffer{_cl_memory.memory()};
+    throw std::runtime_error("Failed to make converter_to");
   }
+}
 
-  TensorObjectDef output_def = input_def;
-  output_def.dimensions.b = handle()->Batch();
-  output_def.dimensions.h = handle()->Height();
-  output_def.dimensions.w = handle()->Width();
-  output_def.dimensions.c = handle()->Channels();
-  output_def.object_def.data_layout = ToDataLayout(handle()->GetStorageType());
-  output_def.object_def.data_type = handle()->GetDataType();
-  output_def.object_def.object_type = ToObjectType(handle()->GetStorageType());
+void ICLTensor::enqueueWriteBuffer(const void *ptr, bool)
+{
+  TensorObject input_obj =
+    MakeReadableCpuMemory(absl::MakeSpan(static_cast<const float *>(ptr), _shape.num_elements()));
 
-  _converter_builder = NewConverterBuilder(_environment.get());
-  if (!_converter_builder->MakeConverter(input_def, tmp_def, &_converter_cpu).ok())
+  TensorObject output_obj;
+
+  TensorObject permute_obj;
+  if (ToObjectType(handle()->GetStorageType()) == ObjectType::OPENCL_TEXTURE)
   {
-    throw std::runtime_error("MakeConverter<_converter_cpu> error.");
+    permute_obj = OpenClTexture{_cl_memory.memory()};
   }
-  if (!_converter_builder->MakeConverter(tmp_def, output_def, &_converter_bhwc).ok())
+  else
   {
-    throw std::runtime_error("MakeConverter<_converter_bhwc> error.");
+    permute_obj = OpenClBuffer{_cl_memory.memory()};
   }
 
-  if (!_converter_cpu->Convert(input_obj, tmp_obj).ok())
+  if (handle()->GetStorageType() == TensorStorageType::BUFFER)
   {
-    throw std::runtime_error("[w] _converter_cpu Convert error.");
+    output_obj = OpenClBuffer{handle()->GetMemoryPtr()};
   }
-  if (!_converter_bhwc->Convert(tmp_obj, output_obj).ok())
+  else if (handle()->GetStorageType() == TensorStorageType::IMAGE_BUFFER)
   {
-    throw std::runtime_error("[w] _converter_bhwc Convert error.");
+    output_obj = OpenClBuffer{handle()->GetMemoryPtrForWriting()};
+  }
+  else
+  {
+    output_obj = OpenClTexture{handle()->GetMemoryPtr()};
+  }
+
+  if (!_converter_to->Convert(input_obj, permute_obj).ok())
+  {
+    throw std::runtime_error("Failed to write cl buffer from cpu memory");
+  }
+  if (!_converter_from->Convert(permute_obj, output_obj).ok())
+  {
+    throw std::runtime_error("Failed to change layout");
   }
 }
 
 void ICLTensor::enqueueReadBuffer(void *ptr, bool)
 {
-  float *arr = (float *)ptr;
-  TensorObject output_obj = MakeCpuMemory(absl::MakeSpan(arr, total_size() / 4));
-
   TensorObject input_obj;
 
   if (handle()->GetStorageType() == TensorStorageType::BUFFER)
@@ -148,72 +185,26 @@ void ICLTensor::enqueueReadBuffer(void *ptr, bool)
     input_obj = OpenClTexture{handle()->GetMemoryPtr()};
   }
 
-  TensorObjectDef input_def;
-  input_def.dimensions.b = handle()->Batch();
-  input_def.dimensions.h = handle()->Height();
-  input_def.dimensions.w = handle()->Width();
-  input_def.dimensions.c = handle()->Channels();
-  input_def.object_def.data_layout = ToDataLayout(handle()->GetStorageType());
-  input_def.object_def.data_type = handle()->GetDataType();
-  input_def.object_def.object_type = ToObjectType(handle()->GetStorageType());
-  input_def.object_def.user_provided = false;
-
-  TensorObjectDef tmp_def;
-  tmp_def.dimensions.b = handle()->Batch();
-  tmp_def.dimensions.h = handle()->Height();
-  tmp_def.dimensions.w = handle()->Width();
-  tmp_def.dimensions.c = handle()->Channels();
-  tmp_def.object_def.data_layout = DataLayout::BHWC;
-  tmp_def.object_def.data_type = DataType::FLOAT32;
-  tmp_def.object_def.object_type = ToObjectType(handle()->GetStorageType());
-  tmp_def.object_def.user_provided = true;
-
-  auto dims = tmp_def.dimensions;
-  const BHWC shape(dims.b, dims.h, dims.w, dims.c);
-  const TensorDescriptor desc{
-    tmp_def.object_def.data_type,
-    ToTensorStorageType(tmp_def.object_def.object_type, tmp_def.object_def.data_layout),
-    Layout::BHWC};
-  if (!AllocateTensorMemory(_environment->context(), shape, desc, &_cl_memory).ok())
+  TensorObject permute_obj;
+  if (ToObjectType(handle()->GetStorageType()) == ObjectType::OPENCL_TEXTURE)
   {
-    throw std::runtime_error("AllocateTensorMemory error.");
-  }
-  TensorObject tmp_obj;
-  if (tmp_def.object_def.object_type == ObjectType::OPENCL_TEXTURE)
-  {
-    tmp_obj = OpenClTexture{_cl_memory.memory()};
+    permute_obj = OpenClTexture{_cl_memory.memory()};
   }
   else
   {
-    tmp_obj = OpenClBuffer{_cl_memory.memory()};
+    permute_obj = OpenClBuffer{_cl_memory.memory()};
   }
-  TensorObjectDef output_def = input_def;
-  output_def.dimensions.b = handle()->Batch();
-  output_def.dimensions.h = handle()->Height();
-  output_def.dimensions.w = handle()->Width();
-  output_def.dimensions.c = handle()->Channels();
-  output_def.object_def.data_layout = DataLayout::BHWC;
-  output_def.object_def.data_type = DataType::FLOAT32;
-  output_def.object_def.object_type = ObjectType::CPU_MEMORY;
-  output_def.object_def.user_provided = true;
 
-  _converter_builder = NewConverterBuilder(_environment.get());
-  if (!_converter_builder->MakeConverter(input_def, tmp_def, &_converter_bhwc).ok())
-  {
-    throw std::runtime_error("MakeConverter<_converter_bhwc> error.");
-  }
-  if (!_converter_builder->MakeConverter(tmp_def, output_def, &_converter_cpu).ok())
-  {
-    throw std::runtime_error("MakeConverter<_converter_cpu> error.");
-  }
+  TensorObject output_obj =
+    MakeCpuMemory(absl::MakeSpan(static_cast<float *>(ptr), _shape.num_elements()));
 
-  if (!_converter_bhwc->Convert(input_obj, tmp_obj).ok())
+  if (!_converter_from->Convert(input_obj, permute_obj).ok())
   {
-    throw std::runtime_error("[r] _converter_bhwc Convert error.");
+    throw std::runtime_error("Failed to change layout");
   }
-  if (!_converter_cpu->Convert(tmp_obj, output_obj).ok())
+  if (!_converter_to->Convert(permute_obj, output_obj).ok())
   {
-    throw std::runtime_error("[r] _converter_cpu Convert error.");
+    throw std::runtime_error("Failed to read cl buffer");
   }
 }
 
diff --git a/runtime/onert/backend/gpu_cl/operand/ICLTensor.h b/runtime/onert/backend/gpu_cl/operand/ICLTensor.h
index 28e905d48..b8ad4469f 100644
--- a/runtime/onert/backend/gpu_cl/operand/ICLTensor.h
+++ b/runtime/onert/backend/gpu_cl/operand/ICLTensor.h
@@ -19,11 +19,14 @@
 
 #include <backend/ITensor.h>
 
-#include "open_cl/Api.h"
-#include "open_cl/Spi.h"
-#include "open_cl/ClCommandQueue.h"
-#include "open_cl/kernels/Converter.h"
-#include "open_cl/Tensor.h"
+#include "tensorflow/lite/delegates/gpu/api.h"
+#include "tensorflow/lite/delegates/gpu/spi.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
+#include "tensorflow/lite/delegates/gpu/cl/kernels/converter.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor.h"
+#include "tensorflow/lite/delegates/gpu/cl/environment.h"
+
+#include "TensorBuilderHelper.h"
 
 namespace onert
 {
@@ -43,19 +46,18 @@ public:
   ICLTensor(ICLTensor &&) = default;
   ICLTensor &operator=(ICLTensor &&) = default;
 
-  ICLTensor(size_t rank, ir::Shape shape, std::shared_ptr<Environment> environment)
-    : _rank{rank}, _shape{shape}, _environment(environment)
+  ICLTensor(size_t rank, ir::Shape shape, std::shared_ptr<tflite::gpu::cl::Environment> environment,
+            TensorType type)
+    : _rank{rank}, _shape{shape}, _environment(environment), _type(type)
   {
   }
 
 public:
   uint8_t *buffer() const final { return reinterpret_cast<uint8_t *>(handle()->GetMemoryPtr()); }
   size_t total_size() const final { return _shape.num_elements() * sizeof(float); }
-  size_t calcOffset(const ir::Coordinates &coords) const final
+  size_t calcOffset(const ir::Coordinates &) const final
   {
-    // NYI
-    (void)coords;
-    return 0;
+    throw std::runtime_error("ICLTensor::calcOffset() is not supported.");
   }
   ir::Layout layout() const final { return ir::Layout::NHWC; }
   ir::DataType data_type() const final { return ir::DataType::FLOAT32; }
@@ -83,19 +85,24 @@ public:
   void enqueueWriteBuffer(const void *ptr, bool blocking = true) final;
   void enqueueReadBuffer(void *ptr, bool blocking = true) final;
 
+  void writeConvertInit();
+  void readConvertInit();
+  TensorType get_type() { return _type; }
+
 public:
-  virtual const Tensor *handle() const = 0;
-  virtual Tensor *handle() = 0;
+  virtual const tflite::gpu::cl::Tensor *handle() const = 0;
+  virtual tflite::gpu::cl::Tensor *handle() = 0;
 
 private:
 protected:
   size_t _rank; // Actual rank (reflects extended rank)
   ir::Shape _shape;
-  std::shared_ptr<Environment> _environment;
-  std::unique_ptr<TensorObjectConverterBuilder> _converter_builder;
-  CLMemory _cl_memory;
-  std::unique_ptr<TensorObjectConverter> _converter_cpu;
-  std::unique_ptr<TensorObjectConverter> _converter_bhwc;
+  std::shared_ptr<tflite::gpu::cl::Environment> _environment;
+  TensorType _type;
+  std::unique_ptr<tflite::gpu::TensorObjectConverterBuilder> _converter_builder;
+  tflite::gpu::cl::CLMemory _cl_memory;
+  std::unique_ptr<tflite::gpu::TensorObjectConverter> _converter_to;
+  std::unique_ptr<tflite::gpu::TensorObjectConverter> _converter_from;
 };
 
 } // namespace operand
diff --git a/runtime/onert/backend/ruy/ops/OperationUtils.h b/runtime/onert/backend/ruy/ops/OperationUtils.h
index 5dfdc7ec5..716400c1f 100644
--- a/runtime/onert/backend/ruy/ops/OperationUtils.h
+++ b/runtime/onert/backend/ruy/ops/OperationUtils.h
@@ -18,17 +18,17 @@
 #define __ONERT_BACKEND_RUY_OPS_OPERATION_UTILS_H__
 
 #include <backend/IPortableTensor.h>
+#include <ir/DataType.h>
+#include <ir/Padding.h>
+#include <util/CalculateActivationRange.h>
 
 #include <ruy/Shape.h>
 #include <ruy/Types.h>
-#include <iostream>
-#include <ir/DataType.h>
-#include <ir/InternalType.h>
-#include <ir/Padding.h>
 
 #include <limits>
 
 using OperandType = onert::ir::DataType;
+using namespace onert::util;
 
 namespace onert
 {
@@ -79,40 +79,6 @@ inline nnfw::ruy::FusedActivationFunctionType convertActivationType(const ir::Ac
   }
 }
 
-template <typename T>
-void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
-{
-  if (activation == ir::Activation::RELU)
-  {
-    *activation_min = 0;
-    *activation_max = std::numeric_limits<T>::max();
-  }
-  else if (activation == ir::Activation::RELU6)
-  {
-    *activation_min = 0;
-    *activation_max = 6;
-  }
-  else if (activation == ir::Activation::RELU1)
-  {
-    *activation_min = -1;
-    *activation_max = 1;
-  }
-  else if (activation == ir::Activation::SIGMOID)
-  {
-    *activation_min = 0;
-    *activation_max = 1;
-  }
-  else if (activation == ir::Activation::NONE)
-  {
-    *activation_min = std::numeric_limits<T>::lowest();
-    *activation_max = std::numeric_limits<T>::max();
-  }
-  else
-  {
-    std::cout << "Unsupported fused activation function." << std::endl;
-  }
-}
-
 nnfw::ruy::PaddingType getPaddingType(ir::PaddingType ir_padding_type);
 
 } // namespace ops
diff --git a/runtime/onert/backend/trix/Backend.h b/runtime/onert/backend/trix/Backend.h
new file mode 100644
index 000000000..a63839720
--- /dev/null
+++ b/runtime/onert/backend/trix/Backend.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_BACKEND_H__
+#define __ONERT_BACKEND_TRIX_BACKEND_H__
+
+#include "BackendContext.h"
+#include "Config.h"
+#include "KernelGenerator.h"
+
+#include <backend/Backend.h>
+
+#include <memory>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+class Backend : public ::onert::backend::Backend
+{
+public:
+  Backend() : _config{std::make_shared<Config>()} {}
+
+  std::shared_ptr<IConfig> config() const override { return _config; }
+
+  std::unique_ptr<onert::backend::BackendContext> newContext(ContextData &&data) const override
+  {
+    auto &graph = *data.graph;
+    auto context = std::make_unique<BackendContext>(this, std::move(data));
+    auto tr = std::make_shared<basic::TensorRegistry>();
+    auto tb = std::make_shared<TensorBuilder>(tr);
+    context->tensor_registry = tr;
+    context->tensor_builder = tb;
+    context->kernel_gen = std::make_shared<KernelGenerator>(graph, tb, tr, context->dev_context());
+    return context;
+  }
+
+private:
+  std::shared_ptr<IConfig> _config;
+};
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_BACKEND_H__
diff --git a/runtime/onert/backend/trix/BackendContext.cc b/runtime/onert/backend/trix/BackendContext.cc
new file mode 100644
index 000000000..e46b11d20
--- /dev/null
+++ b/runtime/onert/backend/trix/BackendContext.cc
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BackendContext.h"
+
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "util/logging.h"
+#include "ir/Index.h"
+#include "ir/OperandIndexMap.h"
+#include "ir/OperandIndexSequence.h"
+#include "backend/basic/BackendContextHelpers.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+ITensorRegistry *BackendContext::genTensors() { return basic::genTensors(*this); }
+
+FunctionMap BackendContext::genKernels()
+{
+  FunctionMap ret;
+
+  for (auto op_ind : _data.op_order)
+  {
+    auto fn_seq = kernel_gen->generate(op_ind);
+    ret.emplace_back(op_ind, std::move(fn_seq));
+  }
+
+  basic::initConsts(*this);
+
+  // NOTE For memory optimization, we want to free some operand data
+  const_cast<ir::Graph &>(*_data.graph)
+    .operands()
+    .iterate([&](const ir::OperandIndex &, ir::Operand &obj) { obj.releaseData(); });
+
+  for (auto &it : ret)
+  {
+    auto &fn_seq = it.second;
+    fn_seq->iterate([&](exec::IFunction &ifunc) { ifunc.prepare(); });
+  }
+
+  return ret;
+}
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/trix/BackendContext.h b/runtime/onert/backend/trix/BackendContext.h
new file mode 100644
index 000000000..c0734c46d
--- /dev/null
+++ b/runtime/onert/backend/trix/BackendContext.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_BACKEND_CONTEXT_H__
+#define __ONERT_BACKEND_TRIX_BACKEND_CONTEXT_H__
+
+#include <backend/BackendContext.h>
+#include "TensorBuilder.h"
+#include "KernelGenerator.h"
+#include "DevContext.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+class BackendContext : public onert::backend::BackendContext
+{
+public:
+  BackendContext(const Backend *backend, ContextData &&data,
+                 std::shared_ptr<ITensorRegistry> tensor_registry = nullptr,
+                 std::shared_ptr<TensorBuilder> tensor_builder = nullptr,
+                 std::shared_ptr<KernelGenerator> kernel_gen = nullptr)
+    : onert::backend::BackendContext(backend, std::move(data), tensor_registry),
+      tensor_builder{tensor_builder}, kernel_gen{kernel_gen}, _dev_context(new DevContext)
+  {
+  }
+
+  ITensorRegistry *genTensors() override;
+  FunctionMap genKernels() override;
+
+  std::shared_ptr<DevContext> dev_context() { return _dev_context; }
+
+public:
+  // TODO Make it private
+  std::shared_ptr<TensorBuilder> tensor_builder;
+  std::shared_ptr<KernelGenerator> kernel_gen;
+
+private:
+  std::shared_ptr<DevContext> _dev_context;
+};
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_BACKEND_CONTEXT_H__
diff --git a/runtime/onert/backend/trix/CMakeLists.txt b/runtime/onert/backend/trix/CMakeLists.txt
new file mode 100644
index 000000000..5455757ca
--- /dev/null
+++ b/runtime/onert/backend/trix/CMakeLists.txt
@@ -0,0 +1,24 @@
+set(LIB_ONERT_BACKEND_TRIX onert_backend_trix)
+
+nnfw_find_package(TRIXEngine EXACT 2.5.0 QUIET)
+if(NOT TRIXEngine_FOUND)
+  return()
+endif(NOT TRIXEngine_FOUND)
+
+file(GLOB_RECURSE SOURCES "*.cc")
+
+add_library(${LIB_ONERT_BACKEND_TRIX} SHARED ${SOURCES})
+
+target_link_libraries(${LIB_ONERT_BACKEND_TRIX} PRIVATE onert_core)
+target_link_libraries(${LIB_ONERT_BACKEND_TRIX} PRIVATE trix_engine)
+target_link_libraries(${LIB_ONERT_BACKEND_TRIX} PRIVATE nnfw_common)
+target_link_libraries(${LIB_ONERT_BACKEND_TRIX} PRIVATE nnfw_coverage)
+
+set_target_properties(${LIB_ONERT_BACKEND_TRIX} PROPERTIES OUTPUT_NAME backend_trix)
+
+if(CMAKE_BUILD_TYPE_LC STREQUAL "release")
+  add_custom_command(TARGET ${LIB_ONERT_BACKEND_TRIX} POST_BUILD
+                     COMMAND ${CMAKE_STRIP} "--strip-unneeded" $<TARGET_FILE_NAME:${LIB_ONERT_BACKEND_TRIX}>)
+endif()
+
+install(TARGETS ${LIB_ONERT_BACKEND_TRIX} DESTINATION lib)
diff --git a/runtime/onert/backend/gpu_cl/open_cl/AccessType.h b/runtime/onert/backend/trix/Config.cc
index 81efd666f..c23326423 100644
--- a/runtime/onert/backend/gpu_cl/open_cl/AccessType.h
+++ b/runtime/onert/backend/trix/Config.cc
@@ -1,12 +1,11 @@
 /*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *    http://www.apache.org/licenses/LICENSE-2.0
+ *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -15,25 +14,19 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_BACKEND_GPU_CL_OPENCL_ACCESS_TYPE_H__
-#define __ONERT_BACKEND_GPU_CL_OPENCL_ACCESS_TYPE_H__
+#include "Config.h"
 
 namespace onert
 {
 namespace backend
 {
-namespace gpu_cl
+namespace trix
 {
-enum class AccessType
-{
-  UNKNOWN,
-  READ,
-  WRITE,
-  READ_WRITE,
-};
 
-} // namespace gpu_cl
+bool Config::initialize() { return true; }
+
+ir::Layout Config::supportLayout(const ir::Operation &, ir::Layout) { return ir::Layout::NHWC; }
+
+} // namespace trix
 } // namespace backend
 } // namespace onert
-
-#endif // __ONERT_BACKEND_GPU_CL_OPENCL_ACCESS_TYPE_H__
diff --git a/runtime/onert/backend/trix/Config.h b/runtime/onert/backend/trix/Config.h
new file mode 100644
index 000000000..799047d6f
--- /dev/null
+++ b/runtime/onert/backend/trix/Config.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_CONFIG_H__
+#define __ONERT_BACKEND_TRIX_CONFIG_H__
+
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/ITimer.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+class Config : public IConfig
+{
+public:
+  std::string id() override { return "trix"; }
+  bool initialize() override;
+  ir::Layout supportLayout(const ir::Operation &node, ir::Layout frontend_layout) override;
+  bool supportPermutation() override { return true; }
+  bool supportDynamicTensor() override { return false; }
+  bool supportFP16() override { return false; }
+
+  std::unique_ptr<util::ITimer> timer() override { return std::make_unique<util::CPUTimer>(); }
+};
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_CONFIG_H__
diff --git a/runtime/onert/backend/trix/DevContext.h b/runtime/onert/backend/trix/DevContext.h
new file mode 100644
index 000000000..482932fd4
--- /dev/null
+++ b/runtime/onert/backend/trix/DevContext.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_DEV_CONTEXT_H__
+#define __ONERT_BACKEND_TRIX_DEV_CONTEXT_H__
+
+#include <libnpuhost.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+class DevContext
+{
+public:
+  DevContext()
+  {
+    auto device_count = getnumNPUdeviceByType(NPUCOND_TRIV2_CONN_SOCIP);
+    if (device_count <= 0)
+    {
+      throw std::runtime_error("Unable to find TRIV2 NPU device");
+    }
+
+    // Use NPU 0 device
+    if (getNPUdeviceByType(&_dev_handle, NPUCOND_TRIV2_CONN_SOCIP, 0) < 0)
+    {
+      throw std::runtime_error("Failed to get TRIV2 NPU device handle");
+    }
+  }
+
+  ~DevContext()
+  {
+    if (_dev_handle != nullptr)
+    {
+      unregisterNPUmodel_all(_dev_handle);
+      putNPUdevice(_dev_handle);
+    }
+  }
+
+  npudev_h getDev() { return _dev_handle; }
+
+  template <typename T> void setDataInfo(tensors_data_info *info, std::vector<T *> &tensors)
+  {
+    info->num_info = static_cast<uint32_t>(tensors.size());
+
+    for (uint32_t idx = 0; idx < info->num_info; ++idx)
+    {
+      info->info[idx].layout = convertDataLayout(tensors[idx]->layout());
+      info->info[idx].type = convertDataType(tensors[idx]->data_type());
+    }
+  }
+
+  template <typename T> void setBuffer(generic_buffers *buf, std::vector<T *> &tensors)
+  {
+    buf->num_buffers = static_cast<uint32_t>(tensors.size());
+
+    for (uint32_t idx = 0; idx < buf->num_buffers; ++idx)
+    {
+      buf->bufs[idx].addr = tensors[idx]->buffer();
+      buf->bufs[idx].size = static_cast<uint64_t>(tensors[idx]->total_size());
+      buf->bufs[idx].type = BUFFER_MAPPED;
+    }
+  }
+
+private:
+  data_layout convertDataLayout(const ir::Layout layout)
+  {
+    switch (layout)
+    {
+      case ir::Layout::NCHW:
+        return DATA_LAYOUT_NCHW;
+      case ir::Layout::NHWC:
+        return DATA_LAYOUT_NHWC;
+      default:
+        throw std::runtime_error("Unknown Layout");
+    }
+  }
+
+  data_type convertDataType(const ir::DataType type)
+  {
+    switch (type)
+    {
+      case ir::DataType::QUANT_UINT8_ASYMM:
+        return DATA_TYPE_QASYMM8;
+      case ir::DataType::QUANT_INT16_SYMM:
+        return DATA_TYPE_QSYMM16;
+      default:
+        throw std::runtime_error("Unsupported data type");
+    }
+  }
+
+private:
+  // NPU device handle
+  // TODO Support multicore npu device
+  npudev_h _dev_handle;
+};
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_DEV_CONTEXT_H__
diff --git a/runtime/onert/backend/trix/KernelGenerator.cc b/runtime/onert/backend/trix/KernelGenerator.cc
new file mode 100644
index 000000000..68e6840dd
--- /dev/null
+++ b/runtime/onert/backend/trix/KernelGenerator.cc
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "KernelGenerator.h"
+
+#include "ops/BulkLayer.h"
+
+#include <backend/Backend.h>
+#include <backend/IConfig.h>
+#include <memory>
+#include <util/Utils.h>
+#include <util/logging.h>
+#include <exec/DynamicShapeInferer.h>
+
+#include <stdexcept>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+KernelGenerator::KernelGenerator(const ir::Graph &graph,
+                                 const std::shared_ptr<TensorBuilder> &tensor_builder,
+                                 const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
+                                 const std::shared_ptr<DevContext> &dev_context)
+  : basic::KernelGeneratorBase{graph},
+    _ctx(graph.operands()), _operations_ctx{graph.operations()}, _current_layout{graph.layout()},
+    _tensor_builder(tensor_builder), _tensor_reg{tensor_reg}, _dev_context{dev_context}
+{
+  // DO NOTHING
+}
+
+std::unique_ptr<exec::FunctionSequence> KernelGenerator::generate(ir::OperationIndex ind)
+{
+  auto ret = std::make_unique<exec::FunctionSequence>();
+  ret->enableDynamicShapeInferer(false);
+
+  const auto &op = _graph.operations().at(ind);
+  op.accept(*this);
+  ret->append(releaseFunction());
+  return ret;
+}
+
+void KernelGenerator::visit(const ir::operation::Bulk &node)
+{
+  using ir::operation::Bulk;
+
+  std::vector<IPortableTensor *> output_tensors;
+  for (auto &ofm_idx : node.getOutputs())
+    output_tensors.emplace_back(_tensor_reg->getPortableTensor(ofm_idx));
+
+  std::vector<const IPortableTensor *> input_tensors;
+  for (auto &ifm_idx : node.getInputs())
+    input_tensors.emplace_back(_tensor_reg->getPortableTensor(ifm_idx));
+
+  // parameters
+  const auto binary_path = node.param().binary_path;
+
+  auto fn = std::make_unique<ops::BulkLayer>();
+
+  fn->configure(input_tensors, output_tensors, binary_path, _dev_context);
+
+  _return_fn = std::move(fn);
+}
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/trix/KernelGenerator.h b/runtime/onert/backend/trix/KernelGenerator.h
new file mode 100644
index 000000000..d87dc6952
--- /dev/null
+++ b/runtime/onert/backend/trix/KernelGenerator.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_KERNEL_GENERATOR_H__
+#define __ONERT_BACKEND_TRIX_KERNEL_GENERATOR_H__
+
+#include "TensorBuilder.h"
+#include "backend/basic/TensorRegistry.h"
+#include "Tensor.h"
+#include "DevContext.h"
+
+#include <backend/basic/KernelGeneratorBase.h>
+#include <ir/Operands.h>
+#include <ir/Operations.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+
+class KernelGenerator : public basic::KernelGeneratorBase
+{
+public:
+  KernelGenerator(const ir::Graph &graph, const std::shared_ptr<TensorBuilder> &tensor_builder,
+                  const std::shared_ptr<basic::TensorRegistry> &tensor_reg,
+                  const std::shared_ptr<DevContext> &dev_context);
+
+  std::unique_ptr<exec::FunctionSequence> generate(ir::OperationIndex op_ind) override;
+
+private:
+  void visit(const ir::operation::Bulk &node) override;
+
+private:
+  const ir::Operands &_ctx;
+  const ir::Operations &_operations_ctx;
+  ir::Layout _current_layout;
+  std::shared_ptr<TensorBuilder> _tensor_builder;
+  std::shared_ptr<basic::TensorRegistry> _tensor_reg;
+  const std::shared_ptr<DevContext> _dev_context;
+};
+
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_KERNEL_GENERATOR_H__
diff --git a/runtime/onert/backend/acl_common/ParentInfo.h b/runtime/onert/backend/trix/Tensor.h
index 708436327..5138cee71 100644
--- a/runtime/onert/backend/acl_common/ParentInfo.h
+++ b/runtime/onert/backend/trix/Tensor.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,31 +14,24 @@
  * limitations under the License.
  */
 
-#ifndef __ONERT_BACKEND_ACL_COMMON_PARENT_INFO_H__
-#define __ONERT_BACKEND_ACL_COMMON_PARENT_INFO_H__
+#ifndef __ONERT_BACKEND_TRIX_TENSOR_H__
+#define __ONERT_BACKEND_TRIX_TENSOR_H__
 
-#include <ir/Index.h>
-#include <ir/Coordinates.h>
+#include <backend/basic/Tensor.h>
+#include <ir/Data.h>
 
 namespace onert
 {
 namespace backend
 {
-namespace acl_common
+namespace trix
 {
 
-/**
- * @brief	Struct to represent parent operand in child operand
- */
-struct ParentInfo
-{
-  ir::OperandIndex parent;
-  ir::Layout frontend_layout;
-  ir::Coordinates coordinates;
-};
+using Tensor = basic::Tensor;
+using ExternalTensor = basic::ExternalTensor;
 
-} // namespace acl_common
+} // namespace trix
 } // namespace backend
 } // namespace onert
 
-#endif // __ONERT_BACKEND_ACL_COMMON_PARENT_INFO_H__
+#endif // __ONERT_BACKEND_TRIX_TENSOR_H__
diff --git a/runtime/onert/backend/gpu_cl/open_cl/GpuObject.cc b/runtime/onert/backend/trix/TensorBuilder.h
index 774f8151f..ac6ca0f9a 100644
--- a/runtime/onert/backend/gpu_cl/open_cl/GpuObject.cc
+++ b/runtime/onert/backend/trix/TensorBuilder.h
@@ -1,12 +1,11 @@
 /*
- * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *    http://www.apache.org/licenses/LICENSE-2.0
+ *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -15,30 +14,22 @@
  * limitations under the License.
  */
 
-#include "GpuObject.h"
+#ifndef __ONERT_BACKEND_TRIX_TENSOR_BUILDER_H__
+#define __ONERT_BACKEND_TRIX_TENSOR_BUILDER_H__
+
+#include <backend/basic/TensorBuilder.h>
 
 namespace onert
 {
 namespace backend
 {
-namespace gpu_cl
+namespace trix
 {
 
-std::string MemoryTypeToCLType(MemoryType type)
-{
-  switch (type)
-  {
-    case MemoryType::GLOBAL:
-      return "__global";
-    case MemoryType::CONSTANT:
-      return "__constant";
-      break;
-    case MemoryType::LOCAL:
-      return "__local";
-  }
-  return "";
-}
+using TensorBuilder = basic::TensorBuilder;
 
-} // namespace gpu_cl
+} // namespace trix
 } // namespace backend
 } // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_TENSOR_BUILDER_H__
diff --git a/runtime/onert/backend/trix/ops/BulkLayer.cc b/runtime/onert/backend/trix/ops/BulkLayer.cc
new file mode 100644
index 000000000..71fdf3f0d
--- /dev/null
+++ b/runtime/onert/backend/trix/ops/BulkLayer.cc
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "BulkLayer.h"
+#include <util/logging.h>
+
+#include <libnpuhost.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+namespace ops
+{
+
+BulkLayer::BulkLayer() : _inputs(), _outputs(), _model_id(0), _meta(nullptr), _dev_context(nullptr)
+{
+  // DO NOTHING
+}
+
+BulkLayer::~BulkLayer() { free(_meta); }
+
+void BulkLayer::configure(const std::vector<const IPortableTensor *> &inputs,
+                          std::vector<IPortableTensor *> &outputs, std::string binary_path,
+                          const std::shared_ptr<DevContext> &dev_context)
+{
+  _inputs = inputs;
+  _outputs = outputs;
+  _dev_context = dev_context;
+
+  _meta = getNPUmodel_metadata(binary_path.c_str(), false);
+  if (_meta == nullptr)
+  {
+    throw std::runtime_error("Unable to extract the model metadata");
+  }
+
+  generic_buffer model_file;
+  model_file.type = BUFFER_FILE;
+  model_file.filepath = binary_path.c_str();
+  model_file.size = _meta->size;
+
+  if (registerNPUmodel(dev_context->getDev(), &model_file, &_model_id) < 0)
+  {
+    throw std::runtime_error("Failed to register npu model");
+  }
+}
+
+void BulkLayer::run()
+{
+  int req_id;
+  if (createNPU_request(_dev_context->getDev(), _model_id, &req_id))
+  {
+    throw std::runtime_error("Unable to create NPU request with model id (" +
+                             std::to_string(_model_id) + ")");
+  }
+
+  if (_meta->input_seg_num != _inputs.size())
+  {
+    throw std::runtime_error("input size does not match to model input seg num");
+  }
+
+  if (_meta->output_seg_num != _outputs.size())
+  {
+    throw std::runtime_error("output size does not match to model output seg num");
+  }
+
+  tensors_data_info in_info;
+  tensors_data_info out_info;
+  _dev_context->setDataInfo<const IPortableTensor>(&in_info, _inputs);
+  _dev_context->setDataInfo<IPortableTensor>(&out_info, _outputs);
+
+  input_buffers input_buf;
+  output_buffers output_buf;
+  _dev_context->setBuffer<const IPortableTensor>(&input_buf, _inputs);
+  _dev_context->setBuffer<IPortableTensor>(&output_buf, _outputs);
+
+  if (setNPU_requestData(_dev_context->getDev(), req_id, &input_buf, &in_info, &output_buf,
+                         &out_info))
+  {
+    throw std::runtime_error("Unable to create NPU request for model id (" +
+                             std::to_string(_model_id) + ")");
+  }
+
+  if (submitNPU_request(_dev_context->getDev(), req_id))
+  {
+    throw std::runtime_error("Unable to submit NPU request with req id (" + std::to_string(req_id) +
+                             ")");
+  }
+
+  if (removeNPU_request(_dev_context->getDev(), req_id))
+  {
+    throw std::runtime_error("Unable to remove NPU request with req id (" + std::to_string(req_id) +
+                             ")");
+  }
+}
+
+void BulkLayer::prepare()
+{
+  // DO NOTHING
+}
+
+} // namespace ops
+} // namespace trix
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/trix/ops/BulkLayer.h b/runtime/onert/backend/trix/ops/BulkLayer.h
new file mode 100644
index 000000000..f7080ccad
--- /dev/null
+++ b/runtime/onert/backend/trix/ops/BulkLayer.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_TRIX_OPS_BULKLAYER_H__
+#define __ONERT_BACKEND_TRIX_OPS_BULKLAYER_H__
+
+#include <backend/IPortableTensor.h>
+#include "../DevContext.h"
+
+#include <exec/IFunction.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace trix
+{
+namespace ops
+{
+
+class BulkLayer : public ::onert::exec::IFunction
+{
+public:
+  BulkLayer();
+  ~BulkLayer();
+
+public:
+  void configure(const std::vector<const IPortableTensor *> &inputs,
+                 std::vector<IPortableTensor *> &outputs, std::string binary_path,
+                 const std::shared_ptr<DevContext> &dev_context);
+
+  void run() override;
+
+  void prepare() override;
+
+private:
+  std::vector<const IPortableTensor *> _inputs;
+  std::vector<IPortableTensor *> _outputs;
+
+  uint32_t _model_id;
+  npubin_meta *_meta;
+  std::shared_ptr<DevContext> _dev_context;
+};
+
+} // namespace ops
+} // namespace trix
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_TRIX_OPS_BULKLAYER_H__
diff --git a/runtime/onert/backend/trix/trix.cc b/runtime/onert/backend/trix/trix.cc
new file mode 100644
index 000000000..816fb4406
--- /dev/null
+++ b/runtime/onert/backend/trix/trix.cc
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Backend.h"
+
+extern "C" {
+
+onert::backend::Backend *onert_backend_create() { return new onert::backend::trix::Backend; }
+
+void onert_backend_destroy(onert::backend::Backend *backend) { delete backend; }
+}
diff --git a/runtime/onert/backend/xnnpack/ops/OperationUtils.h b/runtime/onert/backend/xnnpack/ops/OperationUtils.h
index 5102e32dd..fe93fccc0 100644
--- a/runtime/onert/backend/xnnpack/ops/OperationUtils.h
+++ b/runtime/onert/backend/xnnpack/ops/OperationUtils.h
@@ -17,10 +17,10 @@
 #ifndef __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__
 #define __ONERT_BACKEND_XNNPACK_OPS_OPERATION_UTILS_H__
 
-// duplicated from cpu/ops/OperationUtils.h
+#include <ir/DataType.h>
 #include <ir/InternalType.h>
 #include <ir/Padding.h>
-#include <ir/DataType.h>
+#include <util/CalculateActivationRange.h>
 
 namespace onert
 {
@@ -32,40 +32,7 @@ namespace ops
 {
 
 using OperandType = ir::DataType;
-
-template <typename T>
-void CalculateActivationRange(ir::Activation activation, T *activation_min, T *activation_max)
-{
-  if (activation == ir::Activation::RELU)
-  {
-    *activation_min = 0;
-    *activation_max = std::numeric_limits<T>::max();
-  }
-  else if (activation == ir::Activation::RELU6)
-  {
-    *activation_min = 0;
-    *activation_max = 6;
-  }
-  else if (activation == ir::Activation::RELU1)
-  {
-    *activation_min = -1;
-    *activation_max = 1;
-  }
-  else if (activation == ir::Activation::SIGMOID)
-  {
-    *activation_min = 0;
-    *activation_max = 1;
-  }
-  else if (activation == ir::Activation::NONE)
-  {
-    *activation_min = std::numeric_limits<T>::lowest();
-    *activation_max = std::numeric_limits<T>::max();
-  }
-  else
-  {
-    throw std::runtime_error{"Unsupported fused activation function"};
-  }
-}
+using namespace onert::util; // CalculateActivationRange
 
 } // namespace ops
 } // namespace xnnpack
author	Chunseok Lee <chunseok.lee@samsung.com>	2022-04-15 19:15:11 +0900
committer	Chunseok Lee <chunseok.lee@samsung.com>	2022-04-15 19:15:11 +0900
commit	3ad689f0803519e343c36d5700646e86059df961 (patch)
tree	862346c401a5577518fa7f042532aa931b53aa0e /runtime/onert/backend
parent	ac6e4dd7b480e83b586ef533d7b29a8a97eb48fe (diff)
download	nnfw-3ad689f0803519e343c36d5700646e86059df961.tar.gz nnfw-3ad689f0803519e343c36d5700646e86059df961.tar.bz2 nnfw-3ad689f0803519e343c36d5700646e86059df961.zip