diff options
Diffstat (limited to 'boost/compute/algorithm/fill.hpp')
-rw-r--r-- | boost/compute/algorithm/fill.hpp | 306 |
1 files changed, 306 insertions, 0 deletions
diff --git a/boost/compute/algorithm/fill.hpp b/boost/compute/algorithm/fill.hpp new file mode 100644 index 0000000000..c711f46b94 --- /dev/null +++ b/boost/compute/algorithm/fill.hpp @@ -0,0 +1,306 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_FILL_HPP +#define BOOST_COMPUTE_ALGORITHM_FILL_HPP + +#include <iterator> + +#include <boost/mpl/int.hpp> +#include <boost/mpl/vector.hpp> +#include <boost/mpl/contains.hpp> +#include <boost/utility/enable_if.hpp> + +#include <boost/compute/cl.hpp> +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/copy.hpp> +#include <boost/compute/async/future.hpp> +#include <boost/compute/iterator/constant_iterator.hpp> +#include <boost/compute/iterator/discard_iterator.hpp> +#include <boost/compute/detail/is_buffer_iterator.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> + +namespace boost { +namespace compute { +namespace detail { + +namespace mpl = boost::mpl; + +// fills the range [first, first + count) with value using copy() +template<class BufferIterator, class T> +inline void fill_with_copy(BufferIterator first, + size_t count, + const T &value, + command_queue &queue) +{ + ::boost::compute::copy( + ::boost::compute::make_constant_iterator(value, 0), + ::boost::compute::make_constant_iterator(value, count), + first, + queue + ); +} + +// fills the range [first, first + count) with value using copy_async() +template<class BufferIterator, class T> +inline future<void> fill_async_with_copy(BufferIterator first, + size_t count, + const T &value, + command_queue &queue) +{ + return ::boost::compute::copy_async( + ::boost::compute::make_constant_iterator(value, 0), + ::boost::compute::make_constant_iterator(value, count), + first, + queue + ); +} + +#if defined(CL_VERSION_1_2) + +// meta-function returing true if Iterator points to a range of values +// that can be filled using clEnqueueFillBuffer(). to meet this criteria +// it must have a buffer accessible through iter.get_buffer() and the +// size of its value_type must by in {1, 2, 4, 8, 16, 32, 64, 128}. +template<class Iterator> +struct is_valid_fill_buffer_iterator : + public mpl::and_< + is_buffer_iterator<Iterator>, + mpl::contains< + mpl::vector< + mpl::int_<1>, + mpl::int_<2>, + mpl::int_<4>, + mpl::int_<8>, + mpl::int_<16>, + mpl::int_<32>, + mpl::int_<64>, + mpl::int_<128> + >, + mpl::int_< + sizeof(typename std::iterator_traits<Iterator>::value_type) + > + > + >::type { }; + +template<> +struct is_valid_fill_buffer_iterator<discard_iterator> : public boost::false_type {}; + +// specialization which uses clEnqueueFillBuffer for buffer iterators +template<class BufferIterator, class T> +inline void +dispatch_fill(BufferIterator first, + size_t count, + const T &value, + command_queue &queue, + typename boost::enable_if< + is_valid_fill_buffer_iterator<BufferIterator> + >::type* = 0) +{ + typedef typename std::iterator_traits<BufferIterator>::value_type value_type; + + if(count == 0){ + // nothing to do + return; + } + + // check if the device supports OpenCL 1.2 (required for enqueue_fill_buffer) + if(!queue.check_device_version(1, 2)){ + return fill_with_copy(first, count, value, queue); + } + + value_type pattern = static_cast<value_type>(value); + size_t offset = static_cast<size_t>(first.get_index()); + + if(count == 1){ + // use clEnqueueWriteBuffer() directly when writing a single value + // to the device buffer. this is potentially more efficient and also + // works around a bug in the intel opencl driver. + queue.enqueue_write_buffer( + first.get_buffer(), + offset * sizeof(value_type), + sizeof(value_type), + &pattern + ); + } + else { + queue.enqueue_fill_buffer( + first.get_buffer(), + &pattern, + sizeof(value_type), + offset * sizeof(value_type), + count * sizeof(value_type) + ); + } +} + +template<class BufferIterator, class T> +inline future<void> +dispatch_fill_async(BufferIterator first, + size_t count, + const T &value, + command_queue &queue, + typename boost::enable_if< + is_valid_fill_buffer_iterator<BufferIterator> + >::type* = 0) +{ + typedef typename std::iterator_traits<BufferIterator>::value_type value_type; + + // check if the device supports OpenCL 1.2 (required for enqueue_fill_buffer) + if(!queue.check_device_version(1, 2)){ + return fill_async_with_copy(first, count, value, queue); + } + + value_type pattern = static_cast<value_type>(value); + size_t offset = static_cast<size_t>(first.get_index()); + + event event_ = + queue.enqueue_fill_buffer(first.get_buffer(), + &pattern, + sizeof(value_type), + offset * sizeof(value_type), + count * sizeof(value_type)); + + return future<void>(event_); +} + +#ifdef CL_VERSION_2_0 +// specializations for svm_ptr<T> +template<class T> +inline void dispatch_fill(svm_ptr<T> first, + size_t count, + const T &value, + command_queue &queue) +{ + if(count == 0){ + return; + } + + queue.enqueue_svm_fill( + first.get(), &value, sizeof(T), count * sizeof(T) + ); +} + +template<class T> +inline future<void> dispatch_fill_async(svm_ptr<T> first, + size_t count, + const T &value, + command_queue &queue) +{ + if(count == 0){ + return future<void>(); + } + + event event_ = queue.enqueue_svm_fill( + first.get(), &value, sizeof(T), count * sizeof(T) + ); + + return future<void>(event_); +} +#endif // CL_VERSION_2_0 + +// default implementations +template<class BufferIterator, class T> +inline void +dispatch_fill(BufferIterator first, + size_t count, + const T &value, + command_queue &queue, + typename boost::disable_if< + is_valid_fill_buffer_iterator<BufferIterator> + >::type* = 0) +{ + fill_with_copy(first, count, value, queue); +} + +template<class BufferIterator, class T> +inline future<void> +dispatch_fill_async(BufferIterator first, + size_t count, + const T &value, + command_queue &queue, + typename boost::disable_if< + is_valid_fill_buffer_iterator<BufferIterator> + >::type* = 0) +{ + return fill_async_with_copy(first, count, value, queue); +} +#else +template<class BufferIterator, class T> +inline void dispatch_fill(BufferIterator first, + size_t count, + const T &value, + command_queue &queue) +{ + fill_with_copy(first, count, value, queue); +} + +template<class BufferIterator, class T> +inline future<void> dispatch_fill_async(BufferIterator first, + size_t count, + const T &value, + command_queue &queue) +{ + return fill_async_with_copy(first, count, value, queue); +} +#endif // !defined(CL_VERSION_1_2) + +} // end detail namespace + +/// Fills the range [\p first, \p last) with \p value. +/// +/// \param first first element in the range to fill +/// \param last last element in the range to fill +/// \param value value to copy to each element +/// \param queue command queue to perform the operation +/// +/// For example, to fill a vector on the device with sevens: +/// \code +/// // vector on the device +/// boost::compute::vector<int> vec(10, context); +/// +/// // fill vector with sevens +/// boost::compute::fill(vec.begin(), vec.end(), 7, queue); +/// \endcode +/// +/// \see boost::compute::fill_n() +template<class BufferIterator, class T> +inline void fill(BufferIterator first, + BufferIterator last, + const T &value, + command_queue &queue = system::default_queue()) +{ + size_t count = detail::iterator_range_size(first, last); + if(count == 0){ + return; + } + + detail::dispatch_fill(first, count, value, queue); +} + +template<class BufferIterator, class T> +inline future<void> fill_async(BufferIterator first, + BufferIterator last, + const T &value, + command_queue &queue = system::default_queue()) +{ + size_t count = detail::iterator_range_size(first, last); + if(count == 0){ + return future<void>(); + } + + return detail::dispatch_fill_async(first, count, value, queue); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_FILL_HPP |