summaryrefslogtreecommitdiff
path: root/boost/compute/algorithm/fill.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'boost/compute/algorithm/fill.hpp')
-rw-r--r--boost/compute/algorithm/fill.hpp306
1 files changed, 306 insertions, 0 deletions
diff --git a/boost/compute/algorithm/fill.hpp b/boost/compute/algorithm/fill.hpp
new file mode 100644
index 0000000000..c711f46b94
--- /dev/null
+++ b/boost/compute/algorithm/fill.hpp
@@ -0,0 +1,306 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_FILL_HPP
+#define BOOST_COMPUTE_ALGORITHM_FILL_HPP
+
+#include <iterator>
+
+#include <boost/mpl/int.hpp>
+#include <boost/mpl/vector.hpp>
+#include <boost/mpl/contains.hpp>
+#include <boost/utility/enable_if.hpp>
+
+#include <boost/compute/cl.hpp>
+#include <boost/compute/system.hpp>
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/algorithm/copy.hpp>
+#include <boost/compute/async/future.hpp>
+#include <boost/compute/iterator/constant_iterator.hpp>
+#include <boost/compute/iterator/discard_iterator.hpp>
+#include <boost/compute/detail/is_buffer_iterator.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+namespace mpl = boost::mpl;
+
+// fills the range [first, first + count) with value using copy()
+template<class BufferIterator, class T>
+inline void fill_with_copy(BufferIterator first,
+ size_t count,
+ const T &value,
+ command_queue &queue)
+{
+ ::boost::compute::copy(
+ ::boost::compute::make_constant_iterator(value, 0),
+ ::boost::compute::make_constant_iterator(value, count),
+ first,
+ queue
+ );
+}
+
+// fills the range [first, first + count) with value using copy_async()
+template<class BufferIterator, class T>
+inline future<void> fill_async_with_copy(BufferIterator first,
+ size_t count,
+ const T &value,
+ command_queue &queue)
+{
+ return ::boost::compute::copy_async(
+ ::boost::compute::make_constant_iterator(value, 0),
+ ::boost::compute::make_constant_iterator(value, count),
+ first,
+ queue
+ );
+}
+
+#if defined(CL_VERSION_1_2)
+
+// meta-function returing true if Iterator points to a range of values
+// that can be filled using clEnqueueFillBuffer(). to meet this criteria
+// it must have a buffer accessible through iter.get_buffer() and the
+// size of its value_type must by in {1, 2, 4, 8, 16, 32, 64, 128}.
+template<class Iterator>
+struct is_valid_fill_buffer_iterator :
+ public mpl::and_<
+ is_buffer_iterator<Iterator>,
+ mpl::contains<
+ mpl::vector<
+ mpl::int_<1>,
+ mpl::int_<2>,
+ mpl::int_<4>,
+ mpl::int_<8>,
+ mpl::int_<16>,
+ mpl::int_<32>,
+ mpl::int_<64>,
+ mpl::int_<128>
+ >,
+ mpl::int_<
+ sizeof(typename std::iterator_traits<Iterator>::value_type)
+ >
+ >
+ >::type { };
+
+template<>
+struct is_valid_fill_buffer_iterator<discard_iterator> : public boost::false_type {};
+
+// specialization which uses clEnqueueFillBuffer for buffer iterators
+template<class BufferIterator, class T>
+inline void
+dispatch_fill(BufferIterator first,
+ size_t count,
+ const T &value,
+ command_queue &queue,
+ typename boost::enable_if<
+ is_valid_fill_buffer_iterator<BufferIterator>
+ >::type* = 0)
+{
+ typedef typename std::iterator_traits<BufferIterator>::value_type value_type;
+
+ if(count == 0){
+ // nothing to do
+ return;
+ }
+
+ // check if the device supports OpenCL 1.2 (required for enqueue_fill_buffer)
+ if(!queue.check_device_version(1, 2)){
+ return fill_with_copy(first, count, value, queue);
+ }
+
+ value_type pattern = static_cast<value_type>(value);
+ size_t offset = static_cast<size_t>(first.get_index());
+
+ if(count == 1){
+ // use clEnqueueWriteBuffer() directly when writing a single value
+ // to the device buffer. this is potentially more efficient and also
+ // works around a bug in the intel opencl driver.
+ queue.enqueue_write_buffer(
+ first.get_buffer(),
+ offset * sizeof(value_type),
+ sizeof(value_type),
+ &pattern
+ );
+ }
+ else {
+ queue.enqueue_fill_buffer(
+ first.get_buffer(),
+ &pattern,
+ sizeof(value_type),
+ offset * sizeof(value_type),
+ count * sizeof(value_type)
+ );
+ }
+}
+
+template<class BufferIterator, class T>
+inline future<void>
+dispatch_fill_async(BufferIterator first,
+ size_t count,
+ const T &value,
+ command_queue &queue,
+ typename boost::enable_if<
+ is_valid_fill_buffer_iterator<BufferIterator>
+ >::type* = 0)
+{
+ typedef typename std::iterator_traits<BufferIterator>::value_type value_type;
+
+ // check if the device supports OpenCL 1.2 (required for enqueue_fill_buffer)
+ if(!queue.check_device_version(1, 2)){
+ return fill_async_with_copy(first, count, value, queue);
+ }
+
+ value_type pattern = static_cast<value_type>(value);
+ size_t offset = static_cast<size_t>(first.get_index());
+
+ event event_ =
+ queue.enqueue_fill_buffer(first.get_buffer(),
+ &pattern,
+ sizeof(value_type),
+ offset * sizeof(value_type),
+ count * sizeof(value_type));
+
+ return future<void>(event_);
+}
+
+#ifdef CL_VERSION_2_0
+// specializations for svm_ptr<T>
+template<class T>
+inline void dispatch_fill(svm_ptr<T> first,
+ size_t count,
+ const T &value,
+ command_queue &queue)
+{
+ if(count == 0){
+ return;
+ }
+
+ queue.enqueue_svm_fill(
+ first.get(), &value, sizeof(T), count * sizeof(T)
+ );
+}
+
+template<class T>
+inline future<void> dispatch_fill_async(svm_ptr<T> first,
+ size_t count,
+ const T &value,
+ command_queue &queue)
+{
+ if(count == 0){
+ return future<void>();
+ }
+
+ event event_ = queue.enqueue_svm_fill(
+ first.get(), &value, sizeof(T), count * sizeof(T)
+ );
+
+ return future<void>(event_);
+}
+#endif // CL_VERSION_2_0
+
+// default implementations
+template<class BufferIterator, class T>
+inline void
+dispatch_fill(BufferIterator first,
+ size_t count,
+ const T &value,
+ command_queue &queue,
+ typename boost::disable_if<
+ is_valid_fill_buffer_iterator<BufferIterator>
+ >::type* = 0)
+{
+ fill_with_copy(first, count, value, queue);
+}
+
+template<class BufferIterator, class T>
+inline future<void>
+dispatch_fill_async(BufferIterator first,
+ size_t count,
+ const T &value,
+ command_queue &queue,
+ typename boost::disable_if<
+ is_valid_fill_buffer_iterator<BufferIterator>
+ >::type* = 0)
+{
+ return fill_async_with_copy(first, count, value, queue);
+}
+#else
+template<class BufferIterator, class T>
+inline void dispatch_fill(BufferIterator first,
+ size_t count,
+ const T &value,
+ command_queue &queue)
+{
+ fill_with_copy(first, count, value, queue);
+}
+
+template<class BufferIterator, class T>
+inline future<void> dispatch_fill_async(BufferIterator first,
+ size_t count,
+ const T &value,
+ command_queue &queue)
+{
+ return fill_async_with_copy(first, count, value, queue);
+}
+#endif // !defined(CL_VERSION_1_2)
+
+} // end detail namespace
+
+/// Fills the range [\p first, \p last) with \p value.
+///
+/// \param first first element in the range to fill
+/// \param last last element in the range to fill
+/// \param value value to copy to each element
+/// \param queue command queue to perform the operation
+///
+/// For example, to fill a vector on the device with sevens:
+/// \code
+/// // vector on the device
+/// boost::compute::vector<int> vec(10, context);
+///
+/// // fill vector with sevens
+/// boost::compute::fill(vec.begin(), vec.end(), 7, queue);
+/// \endcode
+///
+/// \see boost::compute::fill_n()
+template<class BufferIterator, class T>
+inline void fill(BufferIterator first,
+ BufferIterator last,
+ const T &value,
+ command_queue &queue = system::default_queue())
+{
+ size_t count = detail::iterator_range_size(first, last);
+ if(count == 0){
+ return;
+ }
+
+ detail::dispatch_fill(first, count, value, queue);
+}
+
+template<class BufferIterator, class T>
+inline future<void> fill_async(BufferIterator first,
+ BufferIterator last,
+ const T &value,
+ command_queue &queue = system::default_queue())
+{
+ size_t count = detail::iterator_range_size(first, last);
+ if(count == 0){
+ return future<void>();
+ }
+
+ return detail::dispatch_fill_async(first, count, value, queue);
+}
+
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_FILL_HPP