diff options
Diffstat (limited to 'runtime/onert/backend/gpu_cl/open_cl/ClCommandQueue.cc')
-rw-r--r-- | runtime/onert/backend/gpu_cl/open_cl/ClCommandQueue.cc | 359 |
1 files changed, 0 insertions, 359 deletions
diff --git a/runtime/onert/backend/gpu_cl/open_cl/ClCommandQueue.cc b/runtime/onert/backend/gpu_cl/open_cl/ClCommandQueue.cc deleted file mode 100644 index d147b7b13..000000000 --- a/runtime/onert/backend/gpu_cl/open_cl/ClCommandQueue.cc +++ /dev/null @@ -1,359 +0,0 @@ -/* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2019 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ClCommandQueue.h" - -#include <algorithm> -#include <map> -#include <string> -#include <vector> -#include <limits> - -#include "absl/strings/str_cat.h" -#include "ClDevice.h" -#include "ClEvent.h" -#include "Util.h" -#include "Types.h" -#include "Status.h" - -namespace onert -{ -namespace backend -{ -namespace gpu_cl -{ - -using namespace std; - -CLCommandQueue::CLCommandQueue(cl_command_queue queue, bool has_ownership) - : queue_(queue), has_ownership_(has_ownership) -{ -} - -CLCommandQueue::CLCommandQueue(CLCommandQueue &&queue) - : queue_(queue.queue_), has_ownership_(queue.has_ownership_) -{ - queue.queue_ = nullptr; -} - -CLCommandQueue &CLCommandQueue::operator=(CLCommandQueue &&queue) -{ - if (this != &queue) - { - Release(); - std::swap(queue_, queue.queue_); - has_ownership_ = queue.has_ownership_; - } - return *this; -} - -CLCommandQueue::~CLCommandQueue() { Release(); } - -void CLCommandQueue::Release() -{ - if (has_ownership_ && queue_) - { - clReleaseCommandQueue(queue_); - queue_ = nullptr; - } -} - -absl::Status CLCommandQueue::Dispatch(const CLKernel &kernel, const int3 &work_groups_count, - const int3 &work_group_size, CLEvent *event) -{ - std::vector<size_t> local(3); - std::vector<size_t> global(3); - for (int i = 0; i < 3; ++i) - { - local[i] = work_group_size[i]; - global[i] = work_groups_count[i] * work_group_size[i]; - } - cl_event resulting_event; - const int error_code = - clEnqueueNDRangeKernel(queue_, kernel.kernel(), 3, nullptr, global.data(), local.data(), 0, - nullptr, event ? &resulting_event : nullptr); - if (event) - { - *event = CLEvent(resulting_event); - } - if (error_code != CL_SUCCESS) - { - return absl::UnknownError( - absl::StrCat("Failed to clEnqueueNDRangeKernel - ", CLErrorCodeToString(error_code))); - } - return absl::OkStatus(); -} - -absl::Status CLCommandQueue::Dispatch(const CLKernel &kernel, const int3 &work_groups_count, - const int3 &work_group_size) -{ - return Dispatch(kernel, work_groups_count, work_group_size, nullptr); -} - -absl::Status CLCommandQueue::EnqueueEvent(CLEvent *event) -{ - cl_event resulting_event; - const int error_code = clEnqueueMarker(queue_, &resulting_event); - *event = CLEvent(resulting_event); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError( - absl::StrCat("Failed to clEnqueueMarker - ", CLErrorCodeToString(error_code))); - } - return absl::OkStatus(); -} - -absl::Status CLCommandQueue::EnqueueWriteImage(cl_mem memory, int3 region, const void *data) -{ - const size_t origin[] = {0, 0, 0}; - const size_t r[] = {static_cast<size_t>(region.x), static_cast<size_t>(region.y), - static_cast<size_t>(region.z)}; - auto error_code = - clEnqueueWriteImage(queue_, memory, CL_TRUE, origin, r, 0, 0, data, 0, nullptr, nullptr); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError(absl::StrCat("Failed to upload data to GPU (clEnqueueWriteImage) - ", - CLErrorCodeToString(error_code))); - } - - return absl::OkStatus(); -} - -absl::Status CLCommandQueue::EnqueueReadImage(cl_mem memory, int3 region, void *data) -{ - const size_t origin[] = {0, 0, 0}; - const size_t r[] = {static_cast<size_t>(region.x), static_cast<size_t>(region.y), - static_cast<size_t>(region.z)}; - auto error_code = - clEnqueueReadImage(queue_, memory, CL_TRUE, origin, r, 0, 0, data, 0, nullptr, nullptr); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError(absl::StrCat("Failed to read data from GPU (clEnqueueReadImage) - ", - CLErrorCodeToString(error_code))); - } - - return absl::OkStatus(); -} - -absl::Status CLCommandQueue::EnqueueWriteBuffer(cl_mem memory, size_t size_in_bytes, - const void *data) -{ - auto error_code = - clEnqueueWriteBuffer(queue_, memory, CL_TRUE, 0, size_in_bytes, data, 0, nullptr, nullptr); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError(absl::StrCat("Failed to upload data to GPU (clEnqueueWriteBuffer) - ", - CLErrorCodeToString(error_code))); - } - return absl::OkStatus(); -} - -absl::Status CLCommandQueue::EnqueueReadBuffer(cl_mem memory, size_t size_in_bytes, void *data) -{ - auto error_code = - clEnqueueReadBuffer(queue_, memory, CL_TRUE, 0, size_in_bytes, data, 0, nullptr, nullptr); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError(absl::StrCat("Failed to read data from GPU (clEnqueueReadBuffer) - ", - CLErrorCodeToString(error_code))); - } - return absl::OkStatus(); -} - -absl::Status CLCommandQueue::WaitForCompletion() -{ - auto error_code = clFinish(queue_); - if (error_code != CL_SUCCESS) - { - return absl::UnknownError( - absl::StrCat("Failed to clFinish - ", CLErrorCodeToString(error_code))); - } - return absl::OkStatus(); -} - -ProfilingCommandQueue::ProfilingCommandQueue(cl_command_queue queue) : CLCommandQueue(queue, true) -{ - events_.reserve(128); -} - -ProfilingCommandQueue::ProfilingCommandQueue(ProfilingCommandQueue &&queue) - : CLCommandQueue(std::move(queue)), events_(std::move(queue.events_)), - current_label_(std::move(queue.current_label_)) -{ -} - -ProfilingCommandQueue &ProfilingCommandQueue::operator=(ProfilingCommandQueue &&queue) -{ - if (this != &queue) - { - events_ = std::move(queue.events_); - current_label_ = std::move(queue.current_label_); - CLCommandQueue::operator=(std::move(queue)); - } - return *this; -} - -void ProfilingCommandQueue::SetEventsLabel(const std::string &name) { current_label_ = name; } - -void ProfilingCommandQueue::ResetMeasurements() { events_.clear(); } - -absl::Status ProfilingCommandQueue::Dispatch(const CLKernel &kernel, const int3 &work_groups_count, - const int3 &work_group_size) -{ - events_.push_back(CLEvent()); - RETURN_IF_ERROR(CLCommandQueue::Dispatch(kernel, work_groups_count, work_group_size, - &events_[events_.size() - 1])); - events_.back().SetName(current_label_); - return absl::OkStatus(); -} - -absl::Status -ProfilingCommandQueue::GetBestWorkGroupIndex(const CLKernel &kernel, const DeviceInfo &device_info, - const std::vector<int3> &work_groups_count, - const std::vector<int3> &work_group_sizes, int *index) -{ - // Some Adreno 3xx can have wrong numbers for some events - const bool possible_bug_with_events = device_info.IsAdreno3xx(); - events_.resize(work_group_sizes.size()); - for (size_t i = 0; i < work_group_sizes.size(); ++i) - { - RETURN_IF_ERROR( - CLCommandQueue::Dispatch(kernel, work_groups_count[i], work_group_sizes[i], &events_[i])); - - // reducing the speed of memory leak on Mali for some kernels - if (device_info.IsMali() && i % 8 == 7) - { - events_[i - 7].Wait(); - } - if (possible_bug_with_events) - { - // We are trying to increase probability for correct result. - RETURN_IF_ERROR(WaitForCompletion()); - } - } - - RETURN_IF_ERROR(WaitForCompletion()); - - // To release memory of some kernel pool on Mali. - if (device_info.IsMali()) - { - RETURN_IF_ERROR(kernel.ReInit()); - } - - int minimum_index = 0; - double minimum_time = std::numeric_limits<double>::max(); - if (possible_bug_with_events) - { // we will try to cut out suspicious results - double average_time = 0.0; - int average_samples_count = 0; - for (size_t i = 0; i < work_group_sizes.size(); ++i) - { - if (events_[i].GetEventTimeMs() < 100 * 1000) - { // 100 sec - average_time += events_[i].GetEventTimeMs(); - average_samples_count++; - } - } - if (average_samples_count == 0) - { - throw std::runtime_error("It cannot be divided by zero"); - } - else - { - average_time /= average_samples_count; - } - - for (size_t i = 0; i < work_group_sizes.size(); ++i) - { - double time = events_[i].GetEventTimeMs(); - if (time < minimum_time && time >= 0.1 * average_time) - { - minimum_index = i; - minimum_time = time; - } - } - } - else - { - for (size_t i = 0; i < work_group_sizes.size(); ++i) - { - double time = events_[i].GetEventTimeMs(); - if (time < minimum_time) - { - minimum_index = i; - minimum_time = time; - } - } - } - - *index = minimum_index; - - return absl::OkStatus(); -} - -absl::Status CreateCLCommandQueue(const CLDevice &device, const CLContext &context, - CLCommandQueue *result) -{ - int error_code; - cl_command_queue queue = clCreateCommandQueue(context.context(), device.id(), 0, &error_code); - if (!queue) - { - return absl::UnknownError( - absl::StrCat("Failed to create a command queue - ", CLErrorCodeToString(error_code))); - } - *result = CLCommandQueue(queue, true); - return absl::OkStatus(); -} - -double ProfilingCommandQueue::GetQueueExecutionTimeMs() const -{ - const uint64_t start = events_.front().GetStartedTimeNs(); - const uint64_t end = events_.back().GetFinishedTimeNs(); - const uint64_t time_ns = (end - start); - - return static_cast<double>(time_ns) / 1000000.0; -} - -double ProfilingCommandQueue::GetSumOfEventsTimeMs() const -{ - double sum = 0.0; - for (uint32_t i = 0; i < events_.size(); ++i) - { - sum += events_[i].GetEventTimeMs(); - } - return sum; -} - -absl::Status CreateProfilingCommandQueue(const CLDevice &device, const CLContext &context, - ProfilingCommandQueue *result) -{ - int error_code; - cl_command_queue queue = - clCreateCommandQueue(context.context(), device.id(), CL_QUEUE_PROFILING_ENABLE, &error_code); - if (!queue) - { - return absl::UnknownError( - absl::StrCat("Failed to create a command queue - ", CLErrorCodeToString(error_code))); - } - - *result = ProfilingCommandQueue(queue); - return absl::OkStatus(); -} - -} // namespace gpu_cl -} // namespace backend -} // namespace onert |