diff options
Diffstat (limited to 'contrib/labs/opencl_test/src/opencl_test.cc')
-rw-r--r-- | contrib/labs/opencl_test/src/opencl_test.cc | 397 |
1 files changed, 0 insertions, 397 deletions
diff --git a/contrib/labs/opencl_test/src/opencl_test.cc b/contrib/labs/opencl_test/src/opencl_test.cc deleted file mode 100644 index 93994ae43..000000000 --- a/contrib/labs/opencl_test/src/opencl_test.cc +++ /dev/null @@ -1,397 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/******************************************************************************* - * Copyright (c) 2008-2015 The Khronos Group Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and/or associated documentation files (the - * "Materials"), to deal in the Materials without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Materials, and to - * permit persons to whom the Materials are furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Materials. - * - * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. - ******************************************************************************/ - -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/CL/OpenCLEx.h" - -#include <iostream> -#include <vector> - -void printDeviceInfo(int n, cl::Device &device, cl::Device &default_device) -{ - bool is_default = (device() == default_device()); - std::cout << "\t\t\t#" << n << " Device: (id: " << device() << ") " - << (is_default ? " -> default" : "") << "\n"; - - const auto name = device.getInfo<CL_DEVICE_NAME>(); - std::cout << "\t\t\t\tName: " << name << "\n"; - - const auto compute_unit = device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(); - std::cout << "\t\t\t\tMax Compute Unit: " << compute_unit << "\n"; - - const auto max_work_item_size = device.getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>(); - std::cout << "\t\t\t\tMax Work Item Size: ["; - for (auto size : max_work_item_size) - std::cout << size << ","; - std::cout << "]\n"; - - const auto max_work_group_size = device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>(); - std::cout << "\t\t\t\tMax Work Grpup Size: " << max_work_group_size << "\n"; - - const auto max_clock_frequency = device.getInfo<CL_DEVICE_MAX_CLOCK_FREQUENCY>(); - std::cout << "\t\t\t\tMax Clock Frequency: " << max_clock_frequency << "\n"; - - std::cout << "\n"; -} - - -class OpenCLGpu -{ - public: - cl::Platform platform_; - cl::Context context_; - cl::vector<cl::Device> devices_; - std::vector<cl::CommandQueue*> q_; - cl::Program program_; - - OpenCLGpu() - { - cl_int cl_error; - - platform_ = cl::Platform::getDefault(); - - try - { - cl_context_properties properties[3] = { - CL_CONTEXT_PLATFORM, (cl_context_properties)platform_(), 0 - }; - - context_ = cl::Context(CL_DEVICE_TYPE_GPU, properties, NULL, NULL, &cl_error); - } - catch (cl::Error &err) // thrown when there is no Context for this platform - { - std::cout << "\t\t No Context Found\n"; - return; - } - - devices_ = context_.getInfo<CL_CONTEXT_DEVICES>(); - - for (int dev_id = 0; dev_id < devices_.size(); dev_id++) - { - cl::CommandQueue* que = new cl::CommandQueue(context_, devices_[dev_id]); - q_.emplace_back(que); - } - } - - ~OpenCLGpu() - { - for (auto each_q : q_) - delete each_q; - } - - void buildProgram(std::string& kernel_source_code) - { - std::vector<std::string> programStrings {kernel_source_code}; - - program_ = cl::Program(context_, programStrings); - - try - { - program_.build("-cl-std=CL1.2"); - } - catch (cl::Error &err) - { - cl_int buildErr = CL_SUCCESS; - auto buildInfo = program_.getBuildInfo<CL_PROGRAM_BUILD_LOG>(&buildErr); - for (auto &pair : buildInfo) { - std::cerr << pair.second << std::endl << std::endl; - } - } - } -}; - - -void checkContextMem() -{ - cl_int cl_error; - - // get context, devices - // - std::cout << "\nChecking if devices in GPU shares the same memory address:\n\n"; - - OpenCLGpu gpu; - - std::cout << "\nDevices in GPU:\n\n"; - - auto &devices = gpu.devices_; - auto default_device = cl::Device::getDefault(); - - int d = 0; - for (auto device : devices) - printDeviceInfo(++d, device, default_device); - - if (d < 2) - { - std::cout << "\t\t This options works when there are n (>= 2) devices.\n"; - return; - } - - // allocate and map memory - - typedef cl_int T; - const int items_per_device = 128; - const int length = items_per_device * devices.size(); - - std::vector<T> input(length); - std::vector<T> output(length, 0); - - for (int i = 0; i < length; i++) - input[i] = i; - - cl::Buffer input_buf(gpu.context_, (cl_mem_flags)CL_MEM_USE_HOST_PTR, length*sizeof(T), input.data(), &cl_error); - cl::Buffer output_buf(gpu.context_, (cl_mem_flags)CL_MEM_USE_HOST_PTR, length*sizeof(T), output.data(), &cl_error); - - // compile test cl code - - std::string kernel_source { - "typedef int T; \n" \ - "kernel void memory_test( \n" \ - " const int dev_id, \n" \ - " global T* input, \n" \ - " global T* output, \n" \ - " const int start_idx, \n" \ - " const int count) \n" \ - "{ \n" \ - " int input_idx = get_global_id(0); \n" \ - " if(input_idx < count) \n" \ - " { \n" \ - " int output_idx = start_idx + input_idx; \n" \ - " output[output_idx] = input[input_idx] + dev_id; \n" \ - " } \n" \ - "} \n" - }; - - gpu.buildProgram(kernel_source); - - try - { - auto kernel_functor = cl::KernelFunctor<cl_int, cl::Buffer, cl::Buffer, cl_int, cl_int> - (gpu.program_, "memory_test"); // name should be same as cl function name - - // create a queue per device and queue a kernel job - - for (int dev_id = 0; dev_id < devices.size(); dev_id++) - { - kernel_functor( - cl::EnqueueArgs( - *(gpu.q_[dev_id]), - cl::NDRange(items_per_device)), - (cl_int)dev_id, // dev id - input_buf, - output_buf, - (cl_int)(items_per_device * dev_id), // start index - (cl_int)(items_per_device), // count - cl_error - ); - } - - // sync - - for (d = 0; d < devices.size(); d++) - (gpu.q_[d])->finish(); - - // check if memory state changed by all devices - - cl::copy(*(gpu.q_[0]), output_buf, begin(output), end(output)); - - bool use_same_memory = true; - - for (int dev_id = 0; dev_id < devices.size(); dev_id++) - { - for (int i = 0; i < items_per_device; ++i) - { - int output_idx = items_per_device * dev_id + i; - if (output[output_idx] != input[i] + dev_id) - { - std::cout << "Output[" << output_idx << "] : " - << "expected = " << input[i] + dev_id - << "; actual = " << output[output_idx] << "\n"; - use_same_memory = false; - break; - } - } - } - - if (use_same_memory) - std::cout << "\n=> Mapped memory addresses used by devices in GPU are same.\n\n"; - else - std::cout << "\n=> Mapped memory addresses used by devices in GPU are different.\n\n"; - } - catch (cl::Error &err) - { - std::cerr << "error: code: " << err.err() << ", what: " << err.what() << std::endl; - } -} - -void printHelp() -{ - std::cout << "opencl information: \n\n"; - std::cout << "\t -h : help\n"; - std::cout << "\t -g : print if memory map is shared among devices in GPU (in default platform)\n\n"; - std::cout << "\t -s : test for synchronized work by two devices in a GPU\n\n"; -} - -#include <mutex> -#include <chrono> -#include <thread> -#include <condition_variable> - -#define MAX_DEVICE_NUM 8 // just for testing - -int kernel_idx[MAX_DEVICE_NUM]; -unsigned char kernel_completed = 0x00; // bit 0 = 1 means kernel by device[0] was completed. -unsigned char kernel_completed_flag; // if comparing kernel_completed with this var, all kernels are completed -int device_num; -std::mutex kernel_complete_handler_mutex; - -std::condition_variable wakeup_main; -std::mutex wakeup_main_mutex; - -void notifyKernelFinished(cl_event ev, cl_int ev_info, void * device_idx) -{ - std::cout << "callback from device[" << *((int*)device_idx) << "] : ==> completed.\n"; - - std::unique_lock<std::mutex> lock(kernel_complete_handler_mutex); - - kernel_completed |= 0x01 << *((int*)device_idx); - if (kernel_completed == kernel_completed_flag) - wakeup_main.notify_one(); -} - -void testSync() -{ - OpenCLGpu gpu; - - cl_int cl_error; - typedef cl_int T; - const int items_per_device = 1024*768; - const int length = items_per_device * gpu.devices_.size(); - - std::vector<T> output(length, 0); - - cl::Buffer output_buf(gpu.context_, (cl_mem_flags)CL_MEM_USE_HOST_PTR, length*sizeof(T), output.data(), &cl_error); - - std::string kernel_source { - "kernel void test(global float* output, const int count) \n" \ - "{ \n" \ - " int idx = get_global_id(0); \n" \ - " if(idx < count) \n" \ - " { \n" \ - " float x = hypot(idx/1.111, idx*1.111); \n" \ - " for (int y = 0; y < 200; y++) \n" \ - " x = rootn(log(pown(rootn(log(pown(x, 20)), 5), 20)), 5); \n" \ - " output[idx] = x; \n" \ - " } \n" \ - "} \n" - }; - - gpu.buildProgram(kernel_source); - - try - { - auto kernel_functor = cl::KernelFunctor<cl::Buffer, cl_int> - (gpu.program_, "test"); // name should be same as cl function name - - // variable init - cl::Event ev[MAX_DEVICE_NUM]; - - device_num = gpu.devices_.size(); - - kernel_completed = 0; - kernel_completed_flag = 0; - for (int i = 0; i < device_num; i++) - { - kernel_idx[i] = i; - kernel_completed_flag |= 0x01 << i; - } - - // create a queue per device and queue a kernel job - // queueing with callback function - for (int dev_id = 0; dev_id < gpu.devices_.size(); dev_id++) - { - ev[dev_id] = kernel_functor( - cl::EnqueueArgs( - *(gpu.q_[dev_id]), - cl::NDRange(items_per_device)), - output_buf, - (cl_int)(items_per_device), // count - cl_error - ); - ev[dev_id].setCallback(CL_COMPLETE, notifyKernelFinished, (void*)(kernel_idx+dev_id)); - - // how to check kernel execution status - // - // auto status = ev[dev_id].getInfo<CL_EVENT_COMMAND_EXECUTION_STATUS>(); - // std::cout << "Event status = " << (status == CL_QUEUED ? "CL_QUEUED" : status == CL_SUBMITTED ? "CL_SUBMITTED" : status == CL_COMPLETE ? "CL_COMPLETE" : "unknown") - // << std::endl; - // std::cout << "Event status code = " << status << std::endl; - } - - // long wait until kernels are over - { - std::unique_lock<std::mutex> lk(wakeup_main_mutex); - wakeup_main.wait(lk, []{ return (kernel_completed == kernel_completed_flag); }); - - std::cout << "all devices were completed.\n"; - } - } - catch (cl::Error &err) - { - std::cerr << "error: code: " << err.err() << ", what: " << err.what() << std::endl; - } - -} - -int main(const int argc, char **argv) -{ - if (argc < 2) - printHelp(); - else - { - std::string option = argv[1]; - - if (option == "-h") // help - printHelp(); - else if (option == "-g") // check if devices in GPU uses same memory address - checkContextMem(); - else if (option == "-s") // check synchronization between devices in GPU - testSync(); - } - return 0; -} |