4 files changed, 481 insertions, 0 deletions
diff --git a/runtime/onert/backend/gpu_cl/operand/CLTensor.cc b/runtime/onert/backend/gpu_cl/operand/CLTensor.cc
new file mode 100644
index 000000000..1b19b10f8
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/operand/CLTensor.cc
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "CLTensor.h"
+
+#include "tensorflow/lite/delegates/gpu/cl/buffer.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
+
+using namespace tflite::gpu::cl;
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+namespace operand
+{
+
+CLTensor::CLTensor(size_t rank, TensorType type, tflite::gpu::BHWC shape,
+                   tflite::gpu::TensorDescriptor desc)
+  : ICLTensor{rank, type, shape, desc}, _tensor(std::make_shared<Tensor>())
+{
+}
+
+const tflite::gpu::cl::Tensor *CLTensor::handle() const { return _tensor.get(); }
+
+tflite::gpu::cl::Tensor *CLTensor::handle() { return _tensor.get(); }
+
+} // namespace operand
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/operand/CLTensor.h b/runtime/onert/backend/gpu_cl/operand/CLTensor.h
new file mode 100644
index 000000000..269551d0c
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/operand/CLTensor.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_GPU_CL_OPERAND_CL_TENSOR_H__
+#define __ONERT_BACKEND_GPU_CL_OPERAND_CL_TENSOR_H__
+
+#include "ICLTensor.h"
+
+#include "tensorflow/lite/delegates/gpu/cl/buffer.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+namespace operand
+{
+
+class CLTensor : public ICLTensor
+{
+public:
+  CLTensor() = delete;
+
+public:
+  CLTensor(size_t rank, TensorType type, tflite::gpu::BHWC shape,
+           tflite::gpu::TensorDescriptor desc);
+
+public:
+  const tflite::gpu::cl::Tensor *handle() const override;
+  tflite::gpu::cl::Tensor *handle() override;
+
+public:
+  /** Set given buffer as the buffer of the tensor
+   *
+   * @note Ownership of the memory is not transferred to this object.
+   *       Thus management (allocate/free) should be done by the client.
+   *
+   * @param[in] host_ptr Storage to be used.
+   */
+  void setBuffer(void *host_ptr);
+
+private:
+  std::shared_ptr<tflite::gpu::cl::Tensor> _tensor;
+};
+
+} // namespace operand
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_GPU_CL_OPERAND_CL_TENSOR_H__
diff --git a/runtime/onert/backend/gpu_cl/operand/ICLTensor.cc b/runtime/onert/backend/gpu_cl/operand/ICLTensor.cc
new file mode 100644
index 000000000..1e61b9928
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/operand/ICLTensor.cc
@@ -0,0 +1,226 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ICLTensor.h"
+
+#include "tensorflow/lite/delegates/gpu/api.h"
+#include "tensorflow/lite/delegates/gpu/spi.h"
+#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h"
+#include "tensorflow/lite/delegates/gpu/cl/kernels/converter.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+namespace operand
+{
+
+using namespace tflite::gpu;
+using namespace tflite::gpu::cl;
+using namespace tflite::gpu::internal_tensor;
+
+void ICLTensor::access(const std::function<void(ITensor &tensor)> &fn)
+{
+  if (total_size() == 0)
+    return;
+
+  fn(*this);
+}
+
+void ICLTensor::writeConvertInit(tflite::gpu::TensorObjectConverterBuilder *converter_builder,
+                                 std::shared_ptr<tflite::gpu::cl::Environment> environment)
+{
+  _environment = environment;
+  TensorObjectDef input_def;
+  input_def.dimensions.b = handle()->Batch();
+  input_def.dimensions.h = handle()->Height();
+  input_def.dimensions.w = handle()->Width();
+  input_def.dimensions.c = handle()->Channels();
+  input_def.object_def.data_layout = DataLayout::BHWC;
+  input_def.object_def.data_type = DataType::FLOAT32;
+  input_def.object_def.object_type = ObjectType::CPU_MEMORY;
+  input_def.object_def.user_provided = true;
+
+  TensorObjectDef permute_def = input_def;
+  permute_def.object_def.object_type = ToObjectType(handle()->GetStorageType());
+
+  const auto &dims = permute_def.dimensions;
+  const BHWC shape(dims.b, dims.h, dims.w, dims.c);
+  const TensorDescriptor desc{
+    permute_def.object_def.data_type,
+    ToTensorStorageType(permute_def.object_def.object_type, permute_def.object_def.data_layout),
+    Layout::BHWC};
+  if (!AllocateTensorMemory(_environment->context(), shape, desc, &_cl_memory).ok())
+  {
+    throw std::runtime_error("Failed to AllocateTensorMemory");
+  }
+
+  TensorObjectDef output_def = permute_def;
+  output_def.object_def.data_layout = ToDataLayout(handle()->GetStorageType());
+  output_def.object_def.data_type = handle()->GetDataType();
+  input_def.object_def.user_provided = false;
+
+  if (!converter_builder->MakeConverter(input_def, permute_def, &_converter_to).ok())
+  {
+    throw std::runtime_error("Failed to make converter_to");
+  }
+  if (!converter_builder->MakeConverter(permute_def, output_def, &_converter_from).ok())
+  {
+    throw std::runtime_error("Failed to make converter_from");
+  }
+}
+
+void ICLTensor::readConvertInit(tflite::gpu::TensorObjectConverterBuilder *converter_builder,
+                                std::shared_ptr<tflite::gpu::cl::Environment> environment)
+{
+  _environment = environment;
+  TensorObjectDef input_def;
+  input_def.dimensions.b = handle()->Batch();
+  input_def.dimensions.h = handle()->Height();
+  input_def.dimensions.w = handle()->Width();
+  input_def.dimensions.c = handle()->Channels();
+  input_def.object_def.data_layout = ToDataLayout(handle()->GetStorageType());
+  input_def.object_def.data_type = handle()->GetDataType();
+  input_def.object_def.object_type = ToObjectType(handle()->GetStorageType());
+  input_def.object_def.user_provided = false;
+
+  TensorObjectDef permute_def = input_def;
+  permute_def.object_def.data_layout = DataLayout::BHWC;
+  permute_def.object_def.data_type = DataType::FLOAT32;
+  permute_def.object_def.user_provided = true;
+
+  const auto &dims = permute_def.dimensions;
+  const BHWC shape(dims.b, dims.h, dims.w, dims.c);
+  const TensorDescriptor desc{
+    permute_def.object_def.data_type,
+    ToTensorStorageType(permute_def.object_def.object_type, permute_def.object_def.data_layout),
+    Layout::BHWC};
+  if (!AllocateTensorMemory(_environment->context(), shape, desc, &_cl_memory).ok())
+  {
+    throw std::runtime_error("Failed to AllocateTensorMemory");
+  }
+
+  TensorObjectDef output_def = permute_def;
+  output_def.object_def.object_type = ObjectType::CPU_MEMORY;
+
+  if (!converter_builder->MakeConverter(input_def, permute_def, &_converter_from).ok())
+  {
+    throw std::runtime_error("Failed to make converter_from");
+  }
+  if (!converter_builder->MakeConverter(permute_def, output_def, &_converter_to).ok())
+  {
+    throw std::runtime_error("Failed to make converter_to");
+  }
+}
+
+void ICLTensor::enqueueWriteBuffer(const void *ptr, bool blocking)
+{
+  TensorObject input_obj = MakeReadableCpuMemory(
+    absl::MakeSpan(static_cast<const float *>(ptr), _info._shape.DimensionsProduct()));
+
+  TensorObject output_obj;
+
+  TensorObject permute_obj;
+  if (ToObjectType(handle()->GetStorageType()) == ObjectType::OPENCL_TEXTURE)
+  {
+    permute_obj = OpenClTexture{_cl_memory.memory()};
+  }
+  else
+  {
+    permute_obj = OpenClBuffer{_cl_memory.memory()};
+  }
+
+  if (handle()->GetStorageType() == TensorStorageType::BUFFER)
+  {
+    output_obj = OpenClBuffer{handle()->GetMemoryPtr()};
+  }
+  else if (handle()->GetStorageType() == TensorStorageType::IMAGE_BUFFER)
+  {
+    output_obj = OpenClBuffer{handle()->GetMemoryPtrForWriting()};
+  }
+  else
+  {
+    output_obj = OpenClTexture{handle()->GetMemoryPtr()};
+  }
+
+  if (!_converter_to->Convert(input_obj, permute_obj).ok())
+  {
+    throw std::runtime_error("Failed to write cl buffer from cpu memory");
+  }
+
+  if (blocking && !_environment->queue()->WaitForCompletion().ok())
+  {
+    throw std::runtime_error("Failed to WaitForCompletion");
+  }
+
+  if (!_converter_from->Convert(permute_obj, output_obj).ok())
+  {
+    throw std::runtime_error("Failed to change layout");
+  }
+}
+
+void ICLTensor::enqueueReadBuffer(void *ptr, bool blocking)
+{
+  TensorObject input_obj;
+
+  if (handle()->GetStorageType() == TensorStorageType::BUFFER)
+  {
+    input_obj = OpenClBuffer{handle()->GetMemoryPtr()};
+  }
+  else if (handle()->GetStorageType() == TensorStorageType::IMAGE_BUFFER)
+  {
+    input_obj = OpenClBuffer{handle()->GetMemoryPtrForWriting()};
+  }
+  else
+  {
+    input_obj = OpenClTexture{handle()->GetMemoryPtr()};
+  }
+
+  TensorObject permute_obj;
+  if (ToObjectType(handle()->GetStorageType()) == ObjectType::OPENCL_TEXTURE)
+  {
+    permute_obj = OpenClTexture{_cl_memory.memory()};
+  }
+  else
+  {
+    permute_obj = OpenClBuffer{_cl_memory.memory()};
+  }
+
+  TensorObject output_obj =
+    MakeCpuMemory(absl::MakeSpan(static_cast<float *>(ptr), _info._shape.DimensionsProduct()));
+
+  if (!_converter_from->Convert(input_obj, permute_obj).ok())
+  {
+    throw std::runtime_error("Failed to change layout");
+  }
+  if (!_converter_to->Convert(permute_obj, output_obj).ok())
+  {
+    throw std::runtime_error("Failed to read cl buffer");
+  }
+
+  if (blocking && !_environment->queue()->WaitForCompletion().ok())
+  {
+    throw std::runtime_error("Failed to WaitForCompletion");
+  }
+}
+
+} // namespace operand
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/gpu_cl/operand/ICLTensor.h b/runtime/onert/backend/gpu_cl/operand/ICLTensor.h
new file mode 100644
index 000000000..47420a1c2
--- /dev/null
+++ b/runtime/onert/backend/gpu_cl/operand/ICLTensor.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_GPU_CL_OPERAND_I_CL_TENSOR_H__
+#define __ONERT_BACKEND_GPU_CL_OPERAND_I_CL_TENSOR_H__
+
+#include <backend/ITensor.h>
+
+#include "tensorflow/lite/delegates/gpu/api.h"
+#include "tensorflow/lite/delegates/gpu/spi.h"
+#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h"
+#include "tensorflow/lite/delegates/gpu/cl/kernels/converter.h"
+#include "tensorflow/lite/delegates/gpu/cl/tensor.h"
+#include "tensorflow/lite/delegates/gpu/cl/environment.h"
+
+#include "Utils.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace gpu_cl
+{
+namespace operand
+{
+
+struct TensorInfo
+{
+  tflite::gpu::BHWC _shape;
+  tflite::gpu::TensorDescriptor _desc;
+};
+
+class ICLTensor : public ITensor
+{
+public:
+  ICLTensor() = default;
+  ICLTensor(const ICLTensor &) = delete;
+  ICLTensor &operator=(const ICLTensor &) = delete;
+  ICLTensor(ICLTensor &&) = default;
+  ICLTensor &operator=(ICLTensor &&) = default;
+
+  ICLTensor(size_t rank, TensorType type, tflite::gpu::BHWC shape,
+            tflite::gpu::TensorDescriptor desc)
+    : _rank{rank}, _type(type), _info{shape, desc}
+  {
+  }
+
+public:
+  uint8_t *buffer() const final { return reinterpret_cast<uint8_t *>(handle()->GetMemoryPtr()); }
+  size_t total_size() const final { return _info._shape.DimensionsProduct() * sizeof(float); }
+  size_t calcOffset(const ir::Coordinates &) const final
+  {
+    throw std::runtime_error("ICLTensor::calcOffset() is not supported.");
+  }
+  ir::Layout layout() const final { return ir::Layout::NHWC; }
+  ir::DataType data_type() const final { return ir::DataType::FLOAT32; }
+  float data_scale() const override
+  {
+    throw std::runtime_error("ICLTensor::data_scale() is not supported.");
+  }
+  int32_t data_zero_point() const override
+  {
+    throw std::runtime_error("ICLTensor::data_zero_point() is not supported.");
+  }
+  const std::vector<float> &data_scales() const override
+  {
+    throw std::runtime_error("ICLTensor::data_scales() is not supported.");
+  }
+  const std::vector<int32_t> &data_zero_points() const override
+  {
+    throw std::runtime_error("ICLTensor::data_zero_points() is not supported.");
+  }
+  bool is_dynamic() const override { return false; }
+  ir::Shape getShape() const override
+  {
+    tflite::gpu::BHWC shape = _info._shape;
+    switch (_rank)
+    {
+      case 1:
+        return ir::Shape{shape.b};
+      case 2:
+        return ir::Shape{shape.b, shape.c};
+      case 3:
+        return ir::Shape{shape.b, shape.w, shape.c};
+      case 4:
+        return ir::Shape{shape.b, shape.h, shape.w, shape.c};
+      default:
+        break;
+    }
+    return ir::Shape{};
+  }
+  bool has_padding() const override { return false; }
+  void access(const std::function<void(ITensor &tensor)> &fn) final;
+  bool needMemoryMap() const final { return true; }
+  void enqueueWriteBuffer(const void *ptr, bool blocking = true) final;
+  void enqueueReadBuffer(void *ptr, bool blocking = true) final;
+
+  void writeConvertInit(tflite::gpu::TensorObjectConverterBuilder *converter_builder,
+                        std::shared_ptr<tflite::gpu::cl::Environment> environment);
+  void readConvertInit(tflite::gpu::TensorObjectConverterBuilder *converter_builder,
+                       std::shared_ptr<tflite::gpu::cl::Environment> environment);
+
+  TensorType get_type() { return _type; }
+  TensorType set_type(TensorType type) { return _type = type; }
+  const TensorInfo get_info() { return _info; }
+
+public:
+  virtual const tflite::gpu::cl::Tensor *handle() const = 0;
+  virtual tflite::gpu::cl::Tensor *handle() = 0;
+
+private:
+protected:
+  size_t _rank; // Actual rank (reflects extended rank)
+  TensorType _type;
+  TensorInfo _info;
+  tflite::gpu::cl::CLMemory _cl_memory;
+  std::shared_ptr<tflite::gpu::cl::Environment> _environment;
+  std::unique_ptr<tflite::gpu::TensorObjectConverter> _converter_to;
+  std::unique_ptr<tflite::gpu::TensorObjectConverter> _converter_from;
+};
+
+} // namespace operand
+} // namespace gpu_cl
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_GPU_CL_OPERAND_I_CL_TENSOR_H__