summaryrefslogtreecommitdiff
path: root/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp')
-rw-r--r--libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp360
1 files changed, 111 insertions, 249 deletions
diff --git a/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp b/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
index d535c5da4..05ecdeb22 100644
--- a/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
+++ b/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
@@ -38,255 +38,37 @@
using namespace arm_compute;
const std::map<std::string, std::string> CLKernelLibraryEx::_kernel_program_map = {
- {"absdiff", "absdiff.cl"},
- {"accumulate", "accumulate.cl"},
- {"accumulate_squared", "accumulate.cl"},
- {"accumulate_weighted", "accumulate.cl"},
- {"activation_layer", "activation_layer.cl"},
- {"activation_layer_qa8", "activation_layer_qa8.cl"},
- {"activation_layer_logistic_qa8", "activation_layer_qa8.cl"},
- {"arithmetic_add", "arithmetic_op.cl"},
- {"arithmetic_sub", "arithmetic_op.cl"},
+ // ARMComputeEx kernels
+ {"activation_layer_ex", "activation_layer_ex.cl"},
+ {"arg_op", "arg_operation.cl"},
+ {"arithmetic_sub_ex", "arithmetic_op_ex.cl"},
{"arithmetic_add_qasymm8", "arithmetic_op_quantized.cl"},
- {"batchnormalization_layer_nchw", "batchnormalization_layer.cl"},
- {"batchnormalization_layer_nhwc", "batchnormalization_layer.cl"},
- {"bitwise_or", "bitwise_op.cl"},
- {"bitwise_and", "bitwise_op.cl"},
- {"bitwise_xor", "bitwise_op.cl"},
- {"bitwise_not", "bitwise_op.cl"},
+ {"batch_to_space_nd", "batch_to_space_nd.cl"},
+ {"binary_logical_op", "binary_logical_op.cl"},
{"cast", "cast.cl"},
{"cast_qasymm_in", "cast.cl"},
{"cast_qasymm_out", "cast.cl"},
- {"channel_combine_NV", "channel_combine.cl"},
- {"channel_combine_RGB888", "channel_combine.cl"},
- {"channel_combine_RGBA8888", "channel_combine.cl"},
- {"channel_combine_UYVY422", "channel_combine.cl"},
- {"channel_combine_YUYV422", "channel_combine.cl"},
- {"channel_shuffle_nchw", "channel_shuffle.cl"},
- {"channel_extract_NV12", "channel_extract.cl"},
- {"channel_extract_NV21", "channel_extract.cl"},
- {"channel_extract_RGB888", "channel_extract.cl"},
- {"channel_extract_RGBA8888", "channel_extract.cl"},
- {"channel_extract_UYVY422", "channel_extract.cl"},
- {"channel_extract_YUYV422", "channel_extract.cl"},
- {"combine_gradients_L1", "canny.cl"},
- {"combine_gradients_L2", "canny.cl"},
- {"concatenate_depth", "concatenate.cl"},
- {"concatenate_width", "concatenate.cl"},
- {"convolution_rectangle", "convolution_rectangle.cl"},
- {"col2im", "col2im.cl"},
- {"convert_depth_down", "depth_convert.cl"},
- {"convert_depth_up", "depth_convert.cl"},
- {"convert_fc_weights", "convert_fc_weights.cl"},
- {"convolution3x3_static", "convolution3x3.cl"},
- {"convolution5x5_static", "convolution5x5.cl"},
- {"convolution7x7_static", "convolution7x7.cl"},
- {"convolution9x9_static", "convolution9x9.cl"},
- {"convolution_separable1x5_static", "convolution5x5.cl"},
- {"convolution_separable5x1_static", "convolution5x5.cl"},
- {"convolution_separable1x7_static", "convolution7x7.cl"},
- {"convolution_separable7x1_static", "convolution7x7.cl"},
- {"convolution_separable1x9_static", "convolution9x9.cl"},
- {"convolution_separable9x1_static", "convolution9x9.cl"},
- {"copy_tensor", "copy_tensor.cl"},
- {"copy_plane", "channel_extract.cl"},
- {"copy_planes_3p", "channel_combine.cl"},
- {"copy_to_keypoint", "fast_corners.cl"},
- {"deconvolution_upsample", "deconvolution_layer.cl"},
- {"depthwise_convolution_3x3", "depthwise_convolution.cl"},
- {"depthwise_convolution_3x3_f16", "depthwise_convolution.cl"},
- {"depthwise_convolution_3x3_quantized_nchw", "depthwise_convolution_quantized.cl"},
- {"depthwise_convolution_3x3_quantized_nhwc_stride1", "depthwise_convolution_quantized.cl"},
- {"depthwise_convolution_3x3_quantized_nhwc_stride2", "depthwise_convolution_quantized.cl"},
- {"depthwise_convolution_3x3_stridex1_stridey1_bifrost_f16", "depthwise_convolution.cl"},
- {"depthwise_convolution_3x3_stridex2_stridey2_bifrost_f16", "depthwise_convolution.cl"},
- {"depthwise_convolution_3x3_stridex1_stridey1_bifrost_f32", "depthwise_convolution.cl"},
- {"depthwise_convolution_3x3_stridex2_stridey2_bifrost_f32", "depthwise_convolution.cl"},
- {"depthwise_im2col", "depthwise_convolution.cl"},
- {"depthwise_vector_to_tensor", "depthwise_convolution.cl"},
- {"depthwise_weights_reshape", "depthwise_convolution.cl"},
- {"dequantization_layer", "dequantization_layer.cl"},
- {"derivative", "derivative.cl"},
- {"dilate", "dilate.cl"},
- {"direct_convolution1x1", "direct_convolution1x1.cl"},
- {"direct_convolution1x1_f32_bifrost", "direct_convolution1x1.cl"},
- {"direct_convolution3x3", "direct_convolution3x3.cl"},
- {"direct_convolution3x3_f32_bifrost", "direct_convolution3x3.cl"},
- {"direct_convolution5x5", "direct_convolution5x5.cl"},
- {"direct_convolution5x5_f32_bifrost", "direct_convolution5x5.cl"},
- {"direct_convolution_1x1_3x3_5x5_quantized", "direct_convolution_1x1_3x3_5x5_quantized.cl"},
- {"erode", "erode.cl"},
- {"fast_corners", "fast_corners.cl"},
- {"fill_image_borders_constant", "fill_border.cl"},
- {"fill_image_borders_replicate", "fill_border.cl"},
- {"finalize", "optical_flow_pyramid_lk.cl"},
- {"floor_layer", "floor.cl"},
+ {"comparison_op", "comparison_op.cl"},
+ {"comparison_op_qasymm8", "comparison_op_quantized.cl"},
+ {"depth_to_space", "depth_to_space.cl"},
+ {"embedding_lookup", "embedding_lookup.cl"},
+ {"exp_layer", "exp.cl"},
{"gather", "gather.cl"},
{"gather_1d", "gather.cl"},
{"gather_1d_out", "gather.cl"},
- {"gaussian1x5_sub_x", "gaussian_pyramid.cl"},
- {"gaussian5x1_sub_y", "gaussian_pyramid.cl"},
- {"gemm_accumulate_biases", "gemm.cl"},
- {"gemm_interleave4x4", "gemm.cl"},
- {"gemm_ma_f16", "gemm.cl"},
- {"gemm_ma_f32", "gemm.cl"},
- {"gemm_ma_qs8", "gemm.cl"},
- {"gemm_ma_qs16", "gemm.cl"},
- {"gemm_mv", "gemv.cl"},
- {"gemm_mv_quantized", "gemv.cl"},
- {"gemm_mm_interleaved_transposed_f16", "gemm.cl"},
- {"gemm_mm_interleaved_transposed_f16_bifrost", "gemm.cl"},
- {"gemm_mm_interleaved_transposed_f32", "gemm.cl"},
- {"gemm_mm_interleaved_transposed_f32_bifrost", "gemm.cl"},
- {"gemm_mm_interleaved_transposed_qs8", "gemm.cl"},
- {"gemm_mm_interleaved_transposed_qs16", "gemm.cl"},
- {"gemm_mm_floating_point", "gemm.cl"},
- {"gemm_mm_floating_point_f16_bifrost", "gemm.cl"},
- {"gemm_mm_floating_point_f32_bifrost", "gemm.cl"},
- {"gemm_mm_floating_point_f32_bifrost_1000", "gemm.cl"},
- {"gemm_mm_qs8", "gemm.cl"},
- {"gemm_mm_qs16", "gemm.cl"},
- {"gemm_lc_vm_f32", "gemm.cl"},
- {"gemm_transpose1xW", "gemm.cl"},
- {"gemmlowp_matrix_a_reduction", "gemmlowp.cl"},
- {"gemmlowp_matrix_b_reduction", "gemmlowp.cl"},
- {"gemmlowp_mm_bifrost", "gemmlowp.cl"},
- {"gemmlowp_mm_midgard", "gemmlowp.cl"},
- {"gemmlowp_mm_interleaved_transposed_bifrost", "gemmlowp.cl"},
- {"gemmlowp_mm_interleaved_transposed_midgard", "gemmlowp.cl"},
- {"gemmlowp_offset_contribution", "gemmlowp.cl"},
- {"gemmlowp_output_stage_quantize_down", "gemmlowp.cl"},
- {"gemmlowp_output_stage_quantize_down_fixedpoint", "gemmlowp.cl"},
- {"harris_score_3x3", "harris_corners.cl"},
- {"harris_score_5x5", "harris_corners.cl"},
- {"harris_score_7x7", "harris_corners.cl"},
- {"hist_border_kernel", "histogram.cl"},
- {"hist_border_kernel_fixed", "histogram.cl"},
- {"hist_local_kernel", "histogram.cl"},
- {"hist_local_kernel_fixed", "histogram.cl"},
- {"hog_block_normalization", "hog.cl"},
- {"hog_detector", "hog.cl"},
- {"hog_orientation_binning", "hog.cl"},
- {"hysteresis", "canny.cl"},
- {"im2col1x1_stridex1_dchw", "im2col.cl"},
- {"im2col3x3_dchw", "im2col.cl"},
- {"im2col5x5_dchw", "im2col.cl"},
- {"im2col11x11_padx0_pady0_dchw", "im2col.cl"},
- {"im2col_generic_dchw", "im2col.cl"},
- {"im2col_generic_padx0_pady0_dchw", "im2col.cl"},
- {"im2col_reduced_dchw", "im2col.cl"},
- {"init_level", "optical_flow_pyramid_lk.cl"},
- {"init_level_max", "optical_flow_pyramid_lk.cl"},
- {"init_level_max_initial_estimate", "optical_flow_pyramid_lk.cl"},
- {"integral_horizontal", "integral_image.cl"},
- {"integral_vertical", "integral_image.cl"},
- {"IYUV_to_NV12_bt709", "color_convert.cl"},
- {"IYUV_to_RGB888_bt709", "color_convert.cl"},
- {"IYUV_to_RGBA8888_bt709", "color_convert.cl"},
- {"IYUV_to_YUV444_bt709", "color_convert.cl"},
- {"l2_normalize", "l2_normalize.cl"},
- {"lktracker_stage0", "optical_flow_pyramid_lk.cl"},
- {"lktracker_stage1", "optical_flow_pyramid_lk.cl"},
- {"magnitude_phase", "magnitude_phase.cl"},
- {"mean_stddev_accumulate", "mean_stddev.cl"},
- {"minmax", "minmaxloc.cl"},
- {"minmax_border", "minmaxloc.cl"},
- {"minmax_layer", "minmax_layer.cl"},
- {"minmaxloc", "minmaxloc.cl"},
- {"non_linear_filter_box3x3", "non_linear_filter3x3.cl"},
- {"non_linear_filter_cross3x3", "non_linear_filter3x3.cl"},
- {"non_linear_filter_disk3x3", "non_linear_filter3x3.cl"},
- {"non_linear_filter_box5x5", "non_linear_filter5x5.cl"},
- {"non_linear_filter_cross5x5", "non_linear_filter5x5.cl"},
- {"non_linear_filter_disk5x5", "non_linear_filter5x5.cl"},
- {"non_max_suppression", "nonmax.cl"},
- {"normalization_layer_cross_map", "normalization_layer.cl"},
- {"normalization_layer_in_map", "normalization_layer.cl"},
- {"NV12_to_IYUV_bt709", "color_convert.cl"},
- {"NV12_to_RGB888_bt709", "color_convert.cl"},
- {"NV12_to_RGBA8888_bt709", "color_convert.cl"},
- {"NV12_to_YUV444_bt709", "color_convert.cl"},
- {"NV21_to_IYUV_bt709", "color_convert.cl"},
- {"NV21_to_RGB888_bt709", "color_convert.cl"},
- {"NV21_to_RGBA8888_bt709", "color_convert.cl"},
- {"NV21_to_YUV444_bt709", "color_convert.cl"},
- {"output_stage_quantized", "direct_convolution_1x1_3x3_5x5_quantized.cl"},
- {"permute_201", "permute.cl"},
- {"permute_120", "permute.cl"},
- {"permute_3201", "permute.cl"},
- {"pixelwise_mul_float", "pixelwise_mul_float.cl"},
- {"pixelwise_mul_int", "pixelwise_mul_int.cl"},
+ {"hashtable_lookup", "hashtable_lookup.cl"},
+ {"neg_tensor", "neg_tensor.cl"},
+ {"pad", "pad.cl"},
+ {"permute_generic", "permute_ex.cl"},
{"pixelwise_mul_qasymm8", "pixelwise_mul_quantized.cl"},
{"pixelwise_div_float", "pixelwise_div_float.cl"},
{"pixelwise_div_int", "pixelwise_div_int.cl"},
- {"pooling_layer_2", "pooling_layer.cl"},
- {"pooling_layer_3", "pooling_layer.cl"},
- {"pooling_layer_optimized_3", "pooling_layer.cl"},
- {"pooling_layer_7", "pooling_layer.cl"},
- {"pooling_layer_MxN_nchw", "pooling_layer.cl"},
- {"pooling_layer_MxN_nhwc", "pooling_layer.cl"},
- {"pooling_layer_MxN_quantized_nhwc", "pooling_layer_quantized.cl"},
- {"pooling_layer_MxN_quantized_nchw", "pooling_layer_quantized.cl"},
- {"quantization_layer", "quantization_layer.cl"},
- {"reduce_max", "reduce_max.cl"},
- {"reduction_operation", "reduction_operation.cl"},
- {"reduction_mean", "reduction_mean.cl"},
- {"remap_nearest_neighbour", "remap.cl"},
- {"remap_bilinear", "remap.cl"},
- {"reshape_layer", "reshape_layer.cl"},
- {"reshape_to_columns", "convolution_layer.cl"},
- {"RGB888_to_IYUV_bt709", "color_convert.cl"},
- {"RGB888_to_NV12_bt709", "color_convert.cl"},
- {"RGB888_to_RGBA8888_bt709", "color_convert.cl"},
- {"RGB888_to_YUV444_bt709", "color_convert.cl"},
- {"RGBA8888_to_IYUV_bt709", "color_convert.cl"},
- {"RGBA8888_to_NV12_bt709", "color_convert.cl"},
- {"RGBA8888_to_RGB888_bt709", "color_convert.cl"},
- {"RGBA8888_to_YUV444_bt709", "color_convert.cl"},
- {"roi_pooling_layer", "roi_pooling_layer.cl"},
- {"scale_nearest_neighbour", "scale.cl"},
- {"scale_bilinear", "scale.cl"},
- {"scharr3x3", "scharr_filter.cl"},
- {"sobel3x3", "sobel_filter.cl"},
- {"sobel_separable5x1", "sobel_filter.cl"},
- {"sobel_separable1x5", "sobel_filter.cl"},
- {"sobel_separable7x1", "sobel_filter.cl"},
- {"sobel_separable1x7", "sobel_filter.cl"},
- {"softmax_layer_norm", "softmax_layer.cl"},
- {"softmax_layer_norm_quantized", "softmax_layer_quantized.cl"},
- {"softmax_layer_max_shift_exp_sum_quantized_serial", "softmax_layer_quantized.cl"},
- {"softmax_layer_max_shift_exp_sum_quantized_parallel", "softmax_layer_quantized.cl"},
- {"softmax_layer_max_shift_exp_sum_serial", "softmax_layer.cl"},
- {"softmax_layer_max_shift_exp_sum_parallel", "softmax_layer.cl"},
- {"strided_slice", "strided_slice.cl"},
- {"suppress_non_maximum", "canny.cl"},
- {"tablelookup_U8", "tablelookup.cl"},
- {"tablelookup_S16", "tablelookup.cl"},
- {"threshold_binary", "threshold.cl"},
- {"threshold_range", "threshold.cl"},
- {"transpose", "transpose.cl"},
- {"UYVY422_to_IYUV_bt709", "color_convert.cl"},
- {"UYVY422_to_NV12_bt709", "color_convert.cl"},
- {"UYVY422_to_RGB888_bt709", "color_convert.cl"},
- {"UYVY422_to_RGBA8888_bt709", "color_convert.cl"},
- {"warp_affine_nearest_neighbour", "warp_affine.cl"},
- {"warp_affine_bilinear", "warp_affine.cl"},
- {"warp_perspective_nearest_neighbour", "warp_perspective.cl"},
- {"warp_perspective_bilinear", "warp_perspective.cl"},
- {"winograd_filter_transform_2x2_3x3_nchw", "winograd.cl"},
- {"winograd_filter_transform_4x4_3x3_nchw", "winograd.cl"},
- {"winograd_filter_transform_4x4_5x5_nchw", "winograd.cl"},
- {"winograd_input_transform_4x4_5x5_stepz1_nchw", "winograd.cl"},
- {"winograd_input_transform_2x2_3x3_stepz1_nchw", "winograd.cl"},
- {"winograd_input_transform_2x2_3x3_stepz2_nchw", "winograd.cl"},
- {"winograd_input_transform_4x4_3x3_stepz1_nchw", "winograd.cl"},
- {"winograd_output_transform_2x2_3x3_nchw", "winograd.cl"},
- {"winograd_output_transform_4x4_3x3_nchw", "winograd.cl"},
- {"winograd_output_transform_4x4_5x5_nchw", "winograd.cl"},
- {"YUYV422_to_IYUV_bt709", "color_convert.cl"},
- {"YUYV422_to_NV12_bt709", "color_convert.cl"},
- {"YUYV422_to_RGB888_bt709", "color_convert.cl"},
- {"YUYV422_to_RGBA8888_bt709", "color_convert.cl"},
+ {"prelu", "prelu.cl"},
+ {"prelu_qasymm8", "prelu_quantized.cl"},
+ {"reduce_min_max", "reduce_operation.cl"},
+ {"reduce_sum_mean", "reduce_operation.cl"},
+ {"squared_difference", "squared_difference.cl"},
+ {"strided_slice_ex", "strided_slice_ex.cl"},
{"topkv2_init", "topkv2.cl"},
{"topkv2_find_first_negative", "topkv2.cl"},
{"topkv2_reorder_negatives", "topkv2.cl"},
@@ -296,23 +78,62 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_kernel_program_map
{"radixsort_pastehistograms", "topkv2_radixsort.cl"},
{"radixsort_reorder", "topkv2_radixsort.cl"},
{"topkv2_quicksort", "topkv2_quicksort.cl"},
+ {"space_to_batch_4d_nchw", "space_to_batch.cl"},
+ {"space_to_batch_4d_nhwc", "space_to_batch.cl"},
+ {"space_to_depth", "space_to_depth.cl"},
};
const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map = {
#ifdef EMBEDDED_KERNELS
{
+ "activation_layer_ex.cl",
+#include "./cl_kernels/activation_layer_ex.clembed"
+ },
+ {
+ "arg_operation.cl",
+#include "./cl_kernels/arg_operation.clembed"
+ },
+ {
+ "arithmetic_op_ex.cl",
+#include "./cl_kernels/arithmetic_op_ex.clembed"
+ },
+ {
+ "batch_to_space_nd.cl",
+#include "./cl_kernels/batch_to_space_nd.clembed"
+ },
+ {
"cast.cl",
#include "./cl_kernels/cast.clembed"
},
{
- "fixed_point.h",
-#include "./cl_kernels/fixed_point.hembed"
+ "comparison_op.cl",
+#include "./cl_kernels/comparison_op.clembed"
+ },
+ {
+ "comparison_op_quantized.cl",
+#include "./cl_kernels/comparison_op_quantized.clembed"
+ },
+ {
+ "embedding_lookup.cl",
+#include "./cl_kernels/embedding_lookup.clembed"
+ },
+ {
+ "depth_to_space.cl",
+#include "./cl_kernels/depth_to_space.clembed"
+ },
+ {
+ "exp.cl",
+#include "./cl_kernels/exp.clembed"
},
{
"gather.cl",
#include "./cl_kernels/gather.clembed"
},
{
+ "hashtable_lookup.cl",
+#include "./cl_kernels/hashtable_lookup.clembed"
+ },
+ {
"helpers.h",
#include "./cl_kernels/helpers.hembed"
},
@@ -321,6 +142,18 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map
#include "./cl_kernels/helpers_asymm.hembed"
},
{
+ "binary_logical_op.cl",
+#include "./cl_kernels/binary_logical_op.clembed"
+ },
+ {
+ "neg_tensor.cl",
+#include "./cl_kernels/neg_tensor.clembed"
+ },
+ {
+ "pad.cl",
+#include "./cl_kernels/pad.clembed"
+ },
+ {
"pixelwise_div_float.cl",
#include "./cl_kernels/pixelwise_div_float.clembed"
},
@@ -329,16 +162,32 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map
#include "./cl_kernels/pixelwise_div_int.clembed"
},
{
- "reduce_max.cl",
-#include "./cl_kernels/reduce_max.clembed"
+ "prelu.cl",
+#include "./cl_kernels/prelu.clembed"
+ },
+ {
+ "prelu_quantized.cl",
+#include "./cl_kernels/prelu_quantized.clembed"
+ },
+ {
+ "reduce_operation.cl",
+#include "./cl_kernels/reduce_operation.clembed"
+ },
+ {
+ "space_to_batch.cl",
+#include "./cl_kernels/space_to_batch.clembed"
},
{
- "reduction_mean.cl",
-#include "./cl_kernels/reduction_mean.clembed"
+ "space_to_depth.cl",
+#include "./cl_kernels/space_to_depth.clembed"
},
{
- "strided_slice.cl",
-#include "./cl_kernels/strided_slice.clembed"
+ "squared_difference.cl",
+#include "./cl_kernels/squared_difference.clembed"
+ },
+ {
+ "strided_slice_ex.cl",
+#include "./cl_kernels/strided_slice_ex.clembed"
},
{
"topkv2.cl",
@@ -352,6 +201,11 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map
"topkv2_quicksort.cl",
#include "./cl_kernels/topkv2_quicksort.clembed"
},
+ {
+ "permute_ex.cl",
+#include "./cl_kernels/permute_ex.clembed"
+ },
+
#endif /* EMBEDDED_KERNELS */
};
@@ -359,7 +213,7 @@ CLKernelLibraryEx::CLKernelLibraryEx()
: _context(), _device(), _kernel_path("."), _programs_map(), _built_programs_map()
{
opencl_is_available(); // Make sure the OpenCL symbols are initialised *before* the
- // CLKernelLibrary is built
+ // CLKernelLibraryEx is built
}
CLKernelLibraryEx &CLKernelLibraryEx::get()
@@ -380,7 +234,7 @@ Kernel CLKernelLibraryEx::create_kernel(const std::string &kernel_name,
}
std::string concat_str;
- if (fp16_supported(_device))
+ if (fp16_supported())
{
concat_str += " -DARM_COMPUTE_OPENCL_FP16_ENABLED=1 ";
}
@@ -434,6 +288,13 @@ void CLKernelLibraryEx::add_built_program(const std::string &built_program_name,
_built_programs_map.emplace(built_program_name, program);
}
+bool CLKernelLibraryEx::fp16_supported() const { return ::fp16_supported(_device); }
+
+bool CLKernelLibraryEx::int64_base_atomics_supported() const
+{
+ return device_supports_extension(_device, "cl_khr_int64_base_atomics");
+}
+
const Program &CLKernelLibraryEx::load_program(const std::string &program_name) const
{
const auto program_it = _programs_map.find(program_name);
@@ -525,6 +386,7 @@ size_t CLKernelLibraryEx::max_local_workgroup_size(const cl::Kernel &kernel) con
cl::NDRange CLKernelLibraryEx::default_ndrange() const
{
+ // GPUTarget _target = get_target_from_device(_device);
cl::Device device = cl::Device::getDefault();
GPUTarget _target = get_target_from_device(device);
cl::NDRange default_range;