summaryrefslogtreecommitdiff
path: root/boost/compute/kernel.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'boost/compute/kernel.hpp')
-rw-r--r--boost/compute/kernel.hpp117
1 files changed, 116 insertions, 1 deletions
diff --git a/boost/compute/kernel.hpp b/boost/compute/kernel.hpp
index 097cba692e..25e15bd17f 100644
--- a/boost/compute/kernel.hpp
+++ b/boost/compute/kernel.hpp
@@ -15,10 +15,14 @@
#include <boost/assert.hpp>
#include <boost/utility/enable_if.hpp>
+#include <boost/optional.hpp>
+
+#include <boost/compute/cl_ext.hpp> // cl_khr_subgroups
#include <boost/compute/config.hpp>
-#include <boost/compute/program.hpp>
#include <boost/compute/exception.hpp>
+#include <boost/compute/program.hpp>
+#include <boost/compute/platform.hpp>
#include <boost/compute/type_traits/is_fundamental.hpp>
#include <boost/compute/detail/get_object_info.hpp>
#include <boost/compute/detail/assert_cl_success.hpp>
@@ -124,6 +128,21 @@ public:
}
}
+ #if defined(BOOST_COMPUTE_CL_VERSION_2_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ /// Creates a new kernel object based on a shallow copy of
+ /// the undelying OpenCL kernel object.
+ ///
+ /// \opencl_version_warning{2,1}
+ ///
+ /// \see_opencl21_ref{clCloneKernel}
+ kernel clone()
+ {
+ cl_int ret = 0;
+ cl_kernel k = clCloneKernel(m_kernel, &ret);
+ return kernel(k, false);
+ }
+ #endif // BOOST_COMPUTE_CL_VERSION_2_1
+
/// Returns a reference to the underlying OpenCL kernel object.
cl_kernel& get() const
{
@@ -208,6 +227,102 @@ public:
return detail::get_object_info<T>(clGetKernelWorkGroupInfo, m_kernel, info, device.id());
}
+ #if defined(BOOST_COMPUTE_CL_VERSION_2_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ /// Returns sub-group information for the kernel with \p device. Returns a null
+ /// optional if \p device is not 2.1 device, or is not 2.0 device with support
+ /// for cl_khr_subgroups extension.
+ ///
+ /// \opencl_version_warning{2,1}
+ /// \see_opencl21_ref{clGetKernelSubGroupInfo}
+ /// \see_opencl2_ref{clGetKernelSubGroupInfoKHR}
+ template<class T>
+ boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
+ const size_t input_size, const void * input) const
+ {
+ if(device.check_version(2, 1))
+ {
+ return detail::get_object_info<T>(
+ clGetKernelSubGroupInfo, m_kernel, info, device.id(), input_size, input
+ );
+ }
+ else if(!device.check_version(2, 0) || !device.supports_extension("cl_khr_subgroups"))
+ {
+ return boost::optional<T>();
+ }
+ // Only CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE and CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE
+ // are supported in cl_khr_subgroups extension for 2.0 devices.
+ else if(info != CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE && info != CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE)
+ {
+ return boost::optional<T>();
+ }
+
+ clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_fptr =
+ reinterpret_cast<clGetKernelSubGroupInfoKHR_fn>(
+ reinterpret_cast<size_t>(
+ device.platform().get_extension_function_address("clGetKernelSubGroupInfoKHR")
+ )
+ );
+
+ return detail::get_object_info<T>(
+ clGetKernelSubGroupInfoKHR_fptr, m_kernel, info, device.id(), input_size, input
+ );
+ }
+
+ /// \overload
+ template<class T>
+ boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info) const
+ {
+ return get_sub_group_info<T>(device, info, 0, 0);
+ }
+
+ /// \overload
+ template<class T>
+ boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
+ const size_t input) const
+ {
+ return get_sub_group_info<T>(device, info, sizeof(size_t), &input);
+ }
+ #endif // BOOST_COMPUTE_CL_VERSION_2_1
+
+ #if defined(BOOST_COMPUTE_CL_VERSION_2_0) && !defined(BOOST_COMPUTE_CL_VERSION_2_1)
+ /// Returns sub-group information for the kernel with \p device. Returns a null
+ /// optional if cl_khr_subgroups extension is not supported by \p device.
+ ///
+ /// \opencl_version_warning{2,0}
+ /// \see_opencl2_ref{clGetKernelSubGroupInfoKHR}
+ template<class T>
+ boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
+ const size_t input_size, const void * input) const
+ {
+ if(!device.check_version(2, 0) || !device.supports_extension("cl_khr_subgroups"))
+ {
+ return boost::optional<T>();
+ }
+
+ clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_fptr =
+ reinterpret_cast<clGetKernelSubGroupInfoKHR_fn>(
+ reinterpret_cast<size_t>(
+ device.platform().get_extension_function_address("clGetKernelSubGroupInfoKHR")
+ )
+ );
+
+ return detail::get_object_info<T>(
+ clGetKernelSubGroupInfoKHR_fptr, m_kernel, info, device.id(), input_size, input
+ );
+ }
+ #endif // defined(BOOST_COMPUTE_CL_VERSION_2_0) && !defined(BOOST_COMPUTE_CL_VERSION_2_1)
+
+ #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ /// \overload
+ template<class T>
+ boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
+ const std::vector<size_t> input) const
+ {
+ BOOST_ASSERT(input.size() > 0);
+ return get_sub_group_info<T>(device, info, input.size() * sizeof(size_t), &input[0]);
+ }
+ #endif // BOOST_COMPUTE_CL_VERSION_2_0
+
/// Sets the argument at \p index to \p value with \p size.
///
/// \see_opencl_ref{clSetKernelArg}