diff options
Diffstat (limited to 'runtime/onert/backend/gpu_cl/operand')
-rw-r--r-- | runtime/onert/backend/gpu_cl/operand/CLTensor.cc | 48 | ||||
-rw-r--r-- | runtime/onert/backend/gpu_cl/operand/CLTensor.h | 67 | ||||
-rw-r--r-- | runtime/onert/backend/gpu_cl/operand/ICLTensor.cc | 226 | ||||
-rw-r--r-- | runtime/onert/backend/gpu_cl/operand/ICLTensor.h | 140 |
4 files changed, 481 insertions, 0 deletions
diff --git a/runtime/onert/backend/gpu_cl/operand/CLTensor.cc b/runtime/onert/backend/gpu_cl/operand/CLTensor.cc new file mode 100644 index 000000000..1b19b10f8 --- /dev/null +++ b/runtime/onert/backend/gpu_cl/operand/CLTensor.cc @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "CLTensor.h" + +#include "tensorflow/lite/delegates/gpu/cl/buffer.h" +#include "tensorflow/lite/delegates/gpu/cl/cl_context.h" +#include "tensorflow/lite/delegates/gpu/cl/tensor.h" +#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h" + +using namespace tflite::gpu::cl; + +namespace onert +{ +namespace backend +{ +namespace gpu_cl +{ +namespace operand +{ + +CLTensor::CLTensor(size_t rank, TensorType type, tflite::gpu::BHWC shape, + tflite::gpu::TensorDescriptor desc) + : ICLTensor{rank, type, shape, desc}, _tensor(std::make_shared<Tensor>()) +{ +} + +const tflite::gpu::cl::Tensor *CLTensor::handle() const { return _tensor.get(); } + +tflite::gpu::cl::Tensor *CLTensor::handle() { return _tensor.get(); } + +} // namespace operand +} // namespace gpu_cl +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/operand/CLTensor.h b/runtime/onert/backend/gpu_cl/operand/CLTensor.h new file mode 100644 index 000000000..269551d0c --- /dev/null +++ b/runtime/onert/backend/gpu_cl/operand/CLTensor.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_GPU_CL_OPERAND_CL_TENSOR_H__ +#define __ONERT_BACKEND_GPU_CL_OPERAND_CL_TENSOR_H__ + +#include "ICLTensor.h" + +#include "tensorflow/lite/delegates/gpu/cl/buffer.h" +#include "tensorflow/lite/delegates/gpu/cl/cl_context.h" +#include "tensorflow/lite/delegates/gpu/cl/tensor.h" + +namespace onert +{ +namespace backend +{ +namespace gpu_cl +{ +namespace operand +{ + +class CLTensor : public ICLTensor +{ +public: + CLTensor() = delete; + +public: + CLTensor(size_t rank, TensorType type, tflite::gpu::BHWC shape, + tflite::gpu::TensorDescriptor desc); + +public: + const tflite::gpu::cl::Tensor *handle() const override; + tflite::gpu::cl::Tensor *handle() override; + +public: + /** Set given buffer as the buffer of the tensor + * + * @note Ownership of the memory is not transferred to this object. + * Thus management (allocate/free) should be done by the client. + * + * @param[in] host_ptr Storage to be used. + */ + void setBuffer(void *host_ptr); + +private: + std::shared_ptr<tflite::gpu::cl::Tensor> _tensor; +}; + +} // namespace operand +} // namespace gpu_cl +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_GPU_CL_OPERAND_CL_TENSOR_H__ diff --git a/runtime/onert/backend/gpu_cl/operand/ICLTensor.cc b/runtime/onert/backend/gpu_cl/operand/ICLTensor.cc new file mode 100644 index 000000000..1e61b9928 --- /dev/null +++ b/runtime/onert/backend/gpu_cl/operand/ICLTensor.cc @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ICLTensor.h" + +#include "tensorflow/lite/delegates/gpu/api.h" +#include "tensorflow/lite/delegates/gpu/spi.h" +#include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h" +#include "tensorflow/lite/delegates/gpu/cl/tensor_type_util.h" +#include "tensorflow/lite/delegates/gpu/cl/kernels/converter.h" + +namespace onert +{ +namespace backend +{ +namespace gpu_cl +{ +namespace operand +{ + +using namespace tflite::gpu; +using namespace tflite::gpu::cl; +using namespace tflite::gpu::internal_tensor; + +void ICLTensor::access(const std::function<void(ITensor &tensor)> &fn) +{ + if (total_size() == 0) + return; + + fn(*this); +} + +void ICLTensor::writeConvertInit(tflite::gpu::TensorObjectConverterBuilder *converter_builder, + std::shared_ptr<tflite::gpu::cl::Environment> environment) +{ + _environment = environment; + TensorObjectDef input_def; + input_def.dimensions.b = handle()->Batch(); + input_def.dimensions.h = handle()->Height(); + input_def.dimensions.w = handle()->Width(); + input_def.dimensions.c = handle()->Channels(); + input_def.object_def.data_layout = DataLayout::BHWC; + input_def.object_def.data_type = DataType::FLOAT32; + input_def.object_def.object_type = ObjectType::CPU_MEMORY; + input_def.object_def.user_provided = true; + + TensorObjectDef permute_def = input_def; + permute_def.object_def.object_type = ToObjectType(handle()->GetStorageType()); + + const auto &dims = permute_def.dimensions; + const BHWC shape(dims.b, dims.h, dims.w, dims.c); + const TensorDescriptor desc{ + permute_def.object_def.data_type, + ToTensorStorageType(permute_def.object_def.object_type, permute_def.object_def.data_layout), + Layout::BHWC}; + if (!AllocateTensorMemory(_environment->context(), shape, desc, &_cl_memory).ok()) + { + throw std::runtime_error("Failed to AllocateTensorMemory"); + } + + TensorObjectDef output_def = permute_def; + output_def.object_def.data_layout = ToDataLayout(handle()->GetStorageType()); + output_def.object_def.data_type = handle()->GetDataType(); + input_def.object_def.user_provided = false; + + if (!converter_builder->MakeConverter(input_def, permute_def, &_converter_to).ok()) + { + throw std::runtime_error("Failed to make converter_to"); + } + if (!converter_builder->MakeConverter(permute_def, output_def, &_converter_from).ok()) + { + throw std::runtime_error("Failed to make converter_from"); + } +} + +void ICLTensor::readConvertInit(tflite::gpu::TensorObjectConverterBuilder *converter_builder, + std::shared_ptr<tflite::gpu::cl::Environment> environment) +{ + _environment = environment; + TensorObjectDef input_def; + input_def.dimensions.b = handle()->Batch(); + input_def.dimensions.h = handle()->Height(); + input_def.dimensions.w = handle()->Width(); + input_def.dimensions.c = handle()->Channels(); + input_def.object_def.data_layout = ToDataLayout(handle()->GetStorageType()); + input_def.object_def.data_type = handle()->GetDataType(); + input_def.object_def.object_type = ToObjectType(handle()->GetStorageType()); + input_def.object_def.user_provided = false; + + TensorObjectDef permute_def = input_def; + permute_def.object_def.data_layout = DataLayout::BHWC; + permute_def.object_def.data_type = DataType::FLOAT32; + permute_def.object_def.user_provided = true; + + const auto &dims = permute_def.dimensions; + const BHWC shape(dims.b, dims.h, dims.w, dims.c); + const TensorDescriptor desc{ + permute_def.object_def.data_type, + ToTensorStorageType(permute_def.object_def.object_type, permute_def.object_def.data_layout), + Layout::BHWC}; + if (!AllocateTensorMemory(_environment->context(), shape, desc, &_cl_memory).ok()) + { + throw std::runtime_error("Failed to AllocateTensorMemory"); + } + + TensorObjectDef output_def = permute_def; + output_def.object_def.object_type = ObjectType::CPU_MEMORY; + + if (!converter_builder->MakeConverter(input_def, permute_def, &_converter_from).ok()) + { + throw std::runtime_error("Failed to make converter_from"); + } + if (!converter_builder->MakeConverter(permute_def, output_def, &_converter_to).ok()) + { + throw std::runtime_error("Failed to make converter_to"); + } +} + +void ICLTensor::enqueueWriteBuffer(const void *ptr, bool blocking) +{ + TensorObject input_obj = MakeReadableCpuMemory( + absl::MakeSpan(static_cast<const float *>(ptr), _info._shape.DimensionsProduct())); + + TensorObject output_obj; + + TensorObject permute_obj; + if (ToObjectType(handle()->GetStorageType()) == ObjectType::OPENCL_TEXTURE) + { + permute_obj = OpenClTexture{_cl_memory.memory()}; + } + else + { + permute_obj = OpenClBuffer{_cl_memory.memory()}; + } + + if (handle()->GetStorageType() == TensorStorageType::BUFFER) + { + output_obj = OpenClBuffer{handle()->GetMemoryPtr()}; + } + else if (handle()->GetStorageType() == TensorStorageType::IMAGE_BUFFER) + { + output_obj = OpenClBuffer{handle()->GetMemoryPtrForWriting()}; + } + else + { + output_obj = OpenClTexture{handle()->GetMemoryPtr()}; + } + + if (!_converter_to->Convert(input_obj, permute_obj).ok()) + { + throw std::runtime_error("Failed to write cl buffer from cpu memory"); + } + + if (blocking && !_environment->queue()->WaitForCompletion().ok()) + { + throw std::runtime_error("Failed to WaitForCompletion"); + } + + if (!_converter_from->Convert(permute_obj, output_obj).ok()) + { + throw std::runtime_error("Failed to change layout"); + } +} + +void ICLTensor::enqueueReadBuffer(void *ptr, bool blocking) +{ + TensorObject input_obj; + + if (handle()->GetStorageType() == TensorStorageType::BUFFER) + { + input_obj = OpenClBuffer{handle()->GetMemoryPtr()}; + } + else if (handle()->GetStorageType() == TensorStorageType::IMAGE_BUFFER) + { + input_obj = OpenClBuffer{handle()->GetMemoryPtrForWriting()}; + } + else + { + input_obj = OpenClTexture{handle()->GetMemoryPtr()}; + } + + TensorObject permute_obj; + if (ToObjectType(handle()->GetStorageType()) == ObjectType::OPENCL_TEXTURE) + { + permute_obj = OpenClTexture{_cl_memory.memory()}; + } + else + { + permute_obj = OpenClBuffer{_cl_memory.memory()}; + } + + TensorObject output_obj = + MakeCpuMemory(absl::MakeSpan(static_cast<float *>(ptr), _info._shape.DimensionsProduct())); + + if (!_converter_from->Convert(input_obj, permute_obj).ok()) + { + throw std::runtime_error("Failed to change layout"); + } + if (!_converter_to->Convert(permute_obj, output_obj).ok()) + { + throw std::runtime_error("Failed to read cl buffer"); + } + + if (blocking && !_environment->queue()->WaitForCompletion().ok()) + { + throw std::runtime_error("Failed to WaitForCompletion"); + } +} + +} // namespace operand +} // namespace gpu_cl +} // namespace backend +} // namespace onert diff --git a/runtime/onert/backend/gpu_cl/operand/ICLTensor.h b/runtime/onert/backend/gpu_cl/operand/ICLTensor.h new file mode 100644 index 000000000..47420a1c2 --- /dev/null +++ b/runtime/onert/backend/gpu_cl/operand/ICLTensor.h @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ONERT_BACKEND_GPU_CL_OPERAND_I_CL_TENSOR_H__ +#define __ONERT_BACKEND_GPU_CL_OPERAND_I_CL_TENSOR_H__ + +#include <backend/ITensor.h> + +#include "tensorflow/lite/delegates/gpu/api.h" +#include "tensorflow/lite/delegates/gpu/spi.h" +#include "tensorflow/lite/delegates/gpu/cl/cl_command_queue.h" +#include "tensorflow/lite/delegates/gpu/cl/kernels/converter.h" +#include "tensorflow/lite/delegates/gpu/cl/tensor.h" +#include "tensorflow/lite/delegates/gpu/cl/environment.h" + +#include "Utils.h" + +namespace onert +{ +namespace backend +{ +namespace gpu_cl +{ +namespace operand +{ + +struct TensorInfo +{ + tflite::gpu::BHWC _shape; + tflite::gpu::TensorDescriptor _desc; +}; + +class ICLTensor : public ITensor +{ +public: + ICLTensor() = default; + ICLTensor(const ICLTensor &) = delete; + ICLTensor &operator=(const ICLTensor &) = delete; + ICLTensor(ICLTensor &&) = default; + ICLTensor &operator=(ICLTensor &&) = default; + + ICLTensor(size_t rank, TensorType type, tflite::gpu::BHWC shape, + tflite::gpu::TensorDescriptor desc) + : _rank{rank}, _type(type), _info{shape, desc} + { + } + +public: + uint8_t *buffer() const final { return reinterpret_cast<uint8_t *>(handle()->GetMemoryPtr()); } + size_t total_size() const final { return _info._shape.DimensionsProduct() * sizeof(float); } + size_t calcOffset(const ir::Coordinates &) const final + { + throw std::runtime_error("ICLTensor::calcOffset() is not supported."); + } + ir::Layout layout() const final { return ir::Layout::NHWC; } + ir::DataType data_type() const final { return ir::DataType::FLOAT32; } + float data_scale() const override + { + throw std::runtime_error("ICLTensor::data_scale() is not supported."); + } + int32_t data_zero_point() const override + { + throw std::runtime_error("ICLTensor::data_zero_point() is not supported."); + } + const std::vector<float> &data_scales() const override + { + throw std::runtime_error("ICLTensor::data_scales() is not supported."); + } + const std::vector<int32_t> &data_zero_points() const override + { + throw std::runtime_error("ICLTensor::data_zero_points() is not supported."); + } + bool is_dynamic() const override { return false; } + ir::Shape getShape() const override + { + tflite::gpu::BHWC shape = _info._shape; + switch (_rank) + { + case 1: + return ir::Shape{shape.b}; + case 2: + return ir::Shape{shape.b, shape.c}; + case 3: + return ir::Shape{shape.b, shape.w, shape.c}; + case 4: + return ir::Shape{shape.b, shape.h, shape.w, shape.c}; + default: + break; + } + return ir::Shape{}; + } + bool has_padding() const override { return false; } + void access(const std::function<void(ITensor &tensor)> &fn) final; + bool needMemoryMap() const final { return true; } + void enqueueWriteBuffer(const void *ptr, bool blocking = true) final; + void enqueueReadBuffer(void *ptr, bool blocking = true) final; + + void writeConvertInit(tflite::gpu::TensorObjectConverterBuilder *converter_builder, + std::shared_ptr<tflite::gpu::cl::Environment> environment); + void readConvertInit(tflite::gpu::TensorObjectConverterBuilder *converter_builder, + std::shared_ptr<tflite::gpu::cl::Environment> environment); + + TensorType get_type() { return _type; } + TensorType set_type(TensorType type) { return _type = type; } + const TensorInfo get_info() { return _info; } + +public: + virtual const tflite::gpu::cl::Tensor *handle() const = 0; + virtual tflite::gpu::cl::Tensor *handle() = 0; + +private: +protected: + size_t _rank; // Actual rank (reflects extended rank) + TensorType _type; + TensorInfo _info; + tflite::gpu::cl::CLMemory _cl_memory; + std::shared_ptr<tflite::gpu::cl::Environment> _environment; + std::unique_ptr<tflite::gpu::TensorObjectConverter> _converter_to; + std::unique_ptr<tflite::gpu::TensorObjectConverter> _converter_from; +}; + +} // namespace operand +} // namespace gpu_cl +} // namespace backend +} // namespace onert + +#endif // __ONERT_BACKEND_GPU_CL_OPERAND_I_CL_TENSOR_H__ |