diff options
Diffstat (limited to 'libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp')
-rw-r--r-- | libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp | 409 |
1 files changed, 0 insertions, 409 deletions
diff --git a/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp b/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp deleted file mode 100644 index 05ecdeb22..000000000 --- a/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp +++ /dev/null @@ -1,409 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Utils.h" - -#include <algorithm> -#include <fstream> -#include <iostream> -#include <utility> -#include <vector> - -using namespace arm_compute; - -const std::map<std::string, std::string> CLKernelLibraryEx::_kernel_program_map = { - // ARMComputeEx kernels - {"activation_layer_ex", "activation_layer_ex.cl"}, - {"arg_op", "arg_operation.cl"}, - {"arithmetic_sub_ex", "arithmetic_op_ex.cl"}, - {"arithmetic_add_qasymm8", "arithmetic_op_quantized.cl"}, - {"batch_to_space_nd", "batch_to_space_nd.cl"}, - {"binary_logical_op", "binary_logical_op.cl"}, - {"cast", "cast.cl"}, - {"cast_qasymm_in", "cast.cl"}, - {"cast_qasymm_out", "cast.cl"}, - {"comparison_op", "comparison_op.cl"}, - {"comparison_op_qasymm8", "comparison_op_quantized.cl"}, - {"depth_to_space", "depth_to_space.cl"}, - {"embedding_lookup", "embedding_lookup.cl"}, - {"exp_layer", "exp.cl"}, - {"gather", "gather.cl"}, - {"gather_1d", "gather.cl"}, - {"gather_1d_out", "gather.cl"}, - {"hashtable_lookup", "hashtable_lookup.cl"}, - {"neg_tensor", "neg_tensor.cl"}, - {"pad", "pad.cl"}, - {"permute_generic", "permute_ex.cl"}, - {"pixelwise_mul_qasymm8", "pixelwise_mul_quantized.cl"}, - {"pixelwise_div_float", "pixelwise_div_float.cl"}, - {"pixelwise_div_int", "pixelwise_div_int.cl"}, - {"prelu", "prelu.cl"}, - {"prelu_qasymm8", "prelu_quantized.cl"}, - {"reduce_min_max", "reduce_operation.cl"}, - {"reduce_sum_mean", "reduce_operation.cl"}, - {"squared_difference", "squared_difference.cl"}, - {"strided_slice_ex", "strided_slice_ex.cl"}, - {"topkv2_init", "topkv2.cl"}, - {"topkv2_find_first_negative", "topkv2.cl"}, - {"topkv2_reorder_negatives", "topkv2.cl"}, - {"topkv2_store", "topkv2.cl"}, - {"radixsort_histogram", "topkv2_radixsort.cl"}, - {"radixsort_scanhistograms", "topkv2_radixsort.cl"}, - {"radixsort_pastehistograms", "topkv2_radixsort.cl"}, - {"radixsort_reorder", "topkv2_radixsort.cl"}, - {"topkv2_quicksort", "topkv2_quicksort.cl"}, - {"space_to_batch_4d_nchw", "space_to_batch.cl"}, - {"space_to_batch_4d_nhwc", "space_to_batch.cl"}, - {"space_to_depth", "space_to_depth.cl"}, -}; - -const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map = { -#ifdef EMBEDDED_KERNELS - { - "activation_layer_ex.cl", -#include "./cl_kernels/activation_layer_ex.clembed" - }, - { - "arg_operation.cl", -#include "./cl_kernels/arg_operation.clembed" - }, - { - "arithmetic_op_ex.cl", -#include "./cl_kernels/arithmetic_op_ex.clembed" - }, - { - "batch_to_space_nd.cl", -#include "./cl_kernels/batch_to_space_nd.clembed" - }, - { - "cast.cl", -#include "./cl_kernels/cast.clembed" - }, - { - "comparison_op.cl", -#include "./cl_kernels/comparison_op.clembed" - }, - { - "comparison_op_quantized.cl", -#include "./cl_kernels/comparison_op_quantized.clembed" - }, - { - "embedding_lookup.cl", -#include "./cl_kernels/embedding_lookup.clembed" - }, - { - "depth_to_space.cl", -#include "./cl_kernels/depth_to_space.clembed" - }, - { - "exp.cl", -#include "./cl_kernels/exp.clembed" - }, - { - "gather.cl", -#include "./cl_kernels/gather.clembed" - }, - { - "hashtable_lookup.cl", -#include "./cl_kernels/hashtable_lookup.clembed" - }, - { - "helpers.h", -#include "./cl_kernels/helpers.hembed" - }, - { - "helpers_asymm.h", -#include "./cl_kernels/helpers_asymm.hembed" - }, - { - "binary_logical_op.cl", -#include "./cl_kernels/binary_logical_op.clembed" - }, - { - "neg_tensor.cl", -#include "./cl_kernels/neg_tensor.clembed" - }, - { - "pad.cl", -#include "./cl_kernels/pad.clembed" - }, - { - "pixelwise_div_float.cl", -#include "./cl_kernels/pixelwise_div_float.clembed" - }, - { - "pixelwise_div_int.cl", -#include "./cl_kernels/pixelwise_div_int.clembed" - }, - { - "prelu.cl", -#include "./cl_kernels/prelu.clembed" - }, - { - "prelu_quantized.cl", -#include "./cl_kernels/prelu_quantized.clembed" - }, - { - "reduce_operation.cl", -#include "./cl_kernels/reduce_operation.clembed" - }, - { - "space_to_batch.cl", -#include "./cl_kernels/space_to_batch.clembed" - }, - { - "space_to_depth.cl", -#include "./cl_kernels/space_to_depth.clembed" - }, - { - "squared_difference.cl", -#include "./cl_kernels/squared_difference.clembed" - }, - { - "strided_slice_ex.cl", -#include "./cl_kernels/strided_slice_ex.clembed" - }, - { - "topkv2.cl", -#include "./cl_kernels/topkv2.clembed" - }, - { - "topkv2_radixsort.cl", -#include "./cl_kernels/topkv2_radixsort.clembed" - }, - { - "topkv2_quicksort.cl", -#include "./cl_kernels/topkv2_quicksort.clembed" - }, - { - "permute_ex.cl", -#include "./cl_kernels/permute_ex.clembed" - }, - -#endif /* EMBEDDED_KERNELS */ -}; - -CLKernelLibraryEx::CLKernelLibraryEx() - : _context(), _device(), _kernel_path("."), _programs_map(), _built_programs_map() -{ - opencl_is_available(); // Make sure the OpenCL symbols are initialised *before* the - // CLKernelLibraryEx is built -} - -CLKernelLibraryEx &CLKernelLibraryEx::get() -{ - static CLKernelLibraryEx _kernel_library; - return _kernel_library; -} - -Kernel CLKernelLibraryEx::create_kernel(const std::string &kernel_name, - const StringSet &build_options_set) const -{ - // Find which program contains the kernel - auto kernel_program_it = _kernel_program_map.find(kernel_name); - - if (_kernel_program_map.end() == kernel_program_it) - { - ARM_COMPUTE_ERROR("Kernel %s not found in the CLKernelLibrary", kernel_name.c_str()); - } - std::string concat_str; - - if (fp16_supported()) - { - concat_str += " -DARM_COMPUTE_OPENCL_FP16_ENABLED=1 "; - } - - if (get_cl_version(_device) == CLVersion::CL20) - { - concat_str += " -cl-std=CL2.0 "; - } - else if (arm_non_uniform_workgroup_supported(_device)) - { - concat_str += " -cl-arm-non-uniform-work-group-size "; - } - else - { - ARM_COMPUTE_ERROR("Non uniform workgroup size is not supported!!"); - } - - // Check if the program has been built before with same build options. - const std::string program_name = kernel_program_it->second; - const std::string build_options = stringify_set(build_options_set) + concat_str; - - const std::string built_program_name = program_name + "_" + build_options; - auto built_program_it = _built_programs_map.find(built_program_name); - - cl::Program cl_program; - - if (_built_programs_map.end() != built_program_it) - { - // If program has been built, retrieve to create kernel from it - cl_program = built_program_it->second; - } - else - { - // Get program - Program program = load_program(program_name); - - // Build program - cl_program = program.build(build_options); - - // Add built program to internal map - _built_programs_map.emplace(built_program_name, cl_program); - } - - // Create and return kernel - return Kernel(kernel_name, cl_program); -} - -void CLKernelLibraryEx::add_built_program(const std::string &built_program_name, - cl::Program program) -{ - _built_programs_map.emplace(built_program_name, program); -} - -bool CLKernelLibraryEx::fp16_supported() const { return ::fp16_supported(_device); } - -bool CLKernelLibraryEx::int64_base_atomics_supported() const -{ - return device_supports_extension(_device, "cl_khr_int64_base_atomics"); -} - -const Program &CLKernelLibraryEx::load_program(const std::string &program_name) const -{ - const auto program_it = _programs_map.find(program_name); - - if (program_it != _programs_map.end()) - { - return program_it->second; - } - - Program program; - -#ifdef EMBEDDED_KERNELS - const auto program_source_it = _program_source_map.find(program_name); - - if (_program_source_map.end() == program_source_it) - { - ARM_COMPUTE_ERROR("Embedded program for %s does not exist.", program_name.c_str()); - } - - program = Program(_context, program_name, program_source_it->second); -#else /* EMBEDDED_KERNELS */ - // Check for binary - std::string source_name = _kernel_path + program_name; - std::string binary_name = source_name + "bin"; - - if (std::ifstream(binary_name).is_open()) - { - const std::string program_binary = read_file(binary_name, true); - program = Program(_context, _device, program_name, - std::vector<unsigned char>(program_binary.begin(), program_binary.end())); - } - else if (std::ifstream(source_name).is_open()) - { - program = Program(_context, program_name, read_file(source_name, false)); - } - else - { - ARM_COMPUTE_ERROR("Kernel file %s does not exist.", source_name.c_str()); - } -#endif /* EMBEDDED_KERNELS */ - - // Insert program to program map - const auto new_program = _programs_map.emplace(program_name, std::move(program)); - - return new_program.first->second; -} - -std::string CLKernelLibraryEx::stringify_set(const StringSet &s) const -{ - std::string concat_set; - -#ifndef EMBEDDED_KERNELS - concat_set += "-I" + _kernel_path + " "; -#endif /* EMBEDDED_KERNELS */ - - // Concatenate set - for (const auto &el : s) - { - concat_set += " " + el; - } - - return concat_set; -} - -std::string CLKernelLibraryEx::get_program_source(const std::string &program_name) -{ - const auto program_source_it = _program_source_map.find(program_name); - - if (program_source_it == _program_source_map.end()) - { - ARM_COMPUTE_ERROR("Embedded program for %s does not exist.", program_name.c_str()); - } - - return program_source_it->second; -} - -size_t CLKernelLibraryEx::max_local_workgroup_size(const cl::Kernel &kernel) const -{ - size_t result; - - size_t err = kernel.getWorkGroupInfo(_device, CL_KERNEL_WORK_GROUP_SIZE, &result); - ARM_COMPUTE_ERROR_ON_MSG( - err != 0, - "clGetKernelWorkGroupInfo failed to return the maximum workgroup size for the kernel"); - ARM_COMPUTE_UNUSED(err); - - return result; -} - -cl::NDRange CLKernelLibraryEx::default_ndrange() const -{ - // GPUTarget _target = get_target_from_device(_device); - cl::Device device = cl::Device::getDefault(); - GPUTarget _target = get_target_from_device(device); - cl::NDRange default_range; - - switch (_target) - { - case GPUTarget::MIDGARD: - case GPUTarget::T600: - case GPUTarget::T700: - case GPUTarget::T800: - default_range = cl::NDRange(128u, 1); - break; - default: - default_range = cl::NullRange; - } - - return default_range; -} - -std::string CLKernelLibraryEx::get_device_version() { return _device.getInfo<CL_DEVICE_VERSION>(); } |