diff options
Diffstat (limited to 'boost/compute')
309 files changed, 38849 insertions, 0 deletions
diff --git a/boost/compute/algorithm.hpp b/boost/compute/algorithm.hpp new file mode 100644 index 0000000000..686640e9e9 --- /dev/null +++ b/boost/compute/algorithm.hpp @@ -0,0 +1,94 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_HPP +#define BOOST_COMPUTE_ALGORITHM_HPP + +/// \file +/// +/// Meta-header to include all Boost.Compute algorithm headers. + +#include <boost/compute/algorithm/accumulate.hpp> +#include <boost/compute/algorithm/adjacent_difference.hpp> +#include <boost/compute/algorithm/adjacent_find.hpp> +#include <boost/compute/algorithm/all_of.hpp> +#include <boost/compute/algorithm/any_of.hpp> +#include <boost/compute/algorithm/binary_search.hpp> +#include <boost/compute/algorithm/copy.hpp> +#include <boost/compute/algorithm/copy_if.hpp> +#include <boost/compute/algorithm/copy_n.hpp> +#include <boost/compute/algorithm/count.hpp> +#include <boost/compute/algorithm/count_if.hpp> +#include <boost/compute/algorithm/equal.hpp> +#include <boost/compute/algorithm/equal_range.hpp> +#include <boost/compute/algorithm/exclusive_scan.hpp> +#include <boost/compute/algorithm/fill.hpp> +#include <boost/compute/algorithm/fill_n.hpp> +#include <boost/compute/algorithm/find.hpp> +#include <boost/compute/algorithm/find_end.hpp> +#include <boost/compute/algorithm/find_if.hpp> +#include <boost/compute/algorithm/find_if_not.hpp> +#include <boost/compute/algorithm/for_each.hpp> +#include <boost/compute/algorithm/for_each_n.hpp> +#include <boost/compute/algorithm/gather.hpp> +#include <boost/compute/algorithm/generate.hpp> +#include <boost/compute/algorithm/generate_n.hpp> +#include <boost/compute/algorithm/inclusive_scan.hpp> +#include <boost/compute/algorithm/includes.hpp> +#include <boost/compute/algorithm/inner_product.hpp> +#include <boost/compute/algorithm/iota.hpp> +#include <boost/compute/algorithm/is_partitioned.hpp> +#include <boost/compute/algorithm/is_permutation.hpp> +#include <boost/compute/algorithm/is_sorted.hpp> +#include <boost/compute/algorithm/lower_bound.hpp> +#include <boost/compute/algorithm/lexicographical_compare.hpp> +#include <boost/compute/algorithm/max_element.hpp> +#include <boost/compute/algorithm/merge.hpp> +#include <boost/compute/algorithm/min_element.hpp> +#include <boost/compute/algorithm/minmax_element.hpp> +#include <boost/compute/algorithm/mismatch.hpp> +#include <boost/compute/algorithm/next_permutation.hpp> +#include <boost/compute/algorithm/none_of.hpp> +#include <boost/compute/algorithm/partial_sum.hpp> +#include <boost/compute/algorithm/partition.hpp> +#include <boost/compute/algorithm/partition_copy.hpp> +#include <boost/compute/algorithm/partition_point.hpp> +#include <boost/compute/algorithm/prev_permutation.hpp> +#include <boost/compute/algorithm/random_shuffle.hpp> +#include <boost/compute/algorithm/reduce.hpp> +#include <boost/compute/algorithm/reduce_by_key.hpp> +#include <boost/compute/algorithm/remove.hpp> +#include <boost/compute/algorithm/remove_if.hpp> +#include <boost/compute/algorithm/replace.hpp> +#include <boost/compute/algorithm/replace_copy.hpp> +#include <boost/compute/algorithm/reverse.hpp> +#include <boost/compute/algorithm/reverse_copy.hpp> +#include <boost/compute/algorithm/rotate.hpp> +#include <boost/compute/algorithm/rotate_copy.hpp> +#include <boost/compute/algorithm/scatter.hpp> +#include <boost/compute/algorithm/search.hpp> +#include <boost/compute/algorithm/search_n.hpp> +#include <boost/compute/algorithm/set_difference.hpp> +#include <boost/compute/algorithm/set_intersection.hpp> +#include <boost/compute/algorithm/set_symmetric_difference.hpp> +#include <boost/compute/algorithm/set_union.hpp> +#include <boost/compute/algorithm/sort.hpp> +#include <boost/compute/algorithm/sort_by_key.hpp> +#include <boost/compute/algorithm/stable_partition.hpp> +#include <boost/compute/algorithm/stable_sort.hpp> +#include <boost/compute/algorithm/stable_sort_by_key.hpp> +#include <boost/compute/algorithm/swap_ranges.hpp> +#include <boost/compute/algorithm/transform.hpp> +#include <boost/compute/algorithm/transform_reduce.hpp> +#include <boost/compute/algorithm/unique.hpp> +#include <boost/compute/algorithm/unique_copy.hpp> +#include <boost/compute/algorithm/upper_bound.hpp> + +#endif // BOOST_COMPUTE_ALGORITHM_HPP diff --git a/boost/compute/algorithm/accumulate.hpp b/boost/compute/algorithm/accumulate.hpp new file mode 100644 index 0000000000..328420a07c --- /dev/null +++ b/boost/compute/algorithm/accumulate.hpp @@ -0,0 +1,184 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP +#define BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP + +#include <boost/preprocessor/seq/for_each.hpp> + +#include <boost/compute/system.hpp> +#include <boost/compute/functional.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/reduce.hpp> +#include <boost/compute/algorithm/detail/serial_accumulate.hpp> +#include <boost/compute/container/array.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class InputIterator, class T, class BinaryFunction> +inline T generic_accumulate(InputIterator first, + InputIterator last, + T init, + BinaryFunction function, + command_queue &queue) +{ + const context &context = queue.get_context(); + + size_t size = iterator_range_size(first, last); + if(size == 0){ + return init; + } + + // accumulate on device + array<T, 1> device_result(context); + detail::serial_accumulate( + first, last, device_result.begin(), init, function, queue + ); + + // copy result to host + T result; + ::boost::compute::copy_n(device_result.begin(), 1, &result, queue); + return result; +} + +// returns true if we can use reduce() instead of accumulate() when +// accumulate() this is true when the function is commutative (such as +// addition of integers) and the initial value is the identity value +// for the operation (zero for addition, one for multiplication). +template<class T, class F> +inline bool can_accumulate_with_reduce(T init, F function) +{ + (void) init; + (void) function; + + return false; +} + +/// \internal_ +#define BOOST_COMPUTE_DETAIL_DECLARE_CAN_ACCUMULATE_WITH_REDUCE(r, data, type) \ + inline bool can_accumulate_with_reduce(type init, plus<type>) \ + { \ + return init == type(0); \ + } \ + inline bool can_accumulate_with_reduce(type init, multiplies<type>) \ + { \ + return init == type(1); \ + } + +BOOST_PP_SEQ_FOR_EACH( + BOOST_COMPUTE_DETAIL_DECLARE_CAN_ACCUMULATE_WITH_REDUCE, + _, + (char_)(uchar_)(short_)(ushort_)(int_)(uint_)(long_)(ulong_) +) + +template<class T> +inline bool can_accumulate_with_reduce(T init, min<T>) +{ + return init == (std::numeric_limits<T>::max)(); +} + +template<class T> +inline bool can_accumulate_with_reduce(T init, max<T>) +{ + return init == (std::numeric_limits<T>::min)(); +} + +#undef BOOST_COMPUTE_DETAIL_DECLARE_CAN_ACCUMULATE_WITH_REDUCE + +template<class InputIterator, class T, class BinaryFunction> +inline T dispatch_accumulate(InputIterator first, + InputIterator last, + T init, + BinaryFunction function, + command_queue &queue) +{ + size_t size = iterator_range_size(first, last); + if(size == 0){ + return init; + } + + if(can_accumulate_with_reduce(init, function)){ + T result; + reduce(first, last, &result, function, queue); + return result; + } + else { + return generic_accumulate(first, last, init, function, queue); + } +} + +} // end detail namespace + +/// Returns the result of applying \p function to the elements in the +/// range [\p first, \p last) and \p init. +/// +/// If no function is specified, \c plus will be used. +/// +/// \param first first element in the input range +/// \param last last element in the input range +/// \param init initial value +/// \param function binary reduction function +/// \param queue command queue to perform the operation +/// +/// \return the accumulated result value +/// +/// In specific situations the call to \c accumulate() can be automatically +/// optimized to a call to the more efficient \c reduce() algorithm. This +/// occurs when the binary reduction function is recognized as associative +/// (such as the \c plus<int> function). +/// +/// Note that because floating-point addition is not associative, calling +/// \c accumulate() with \c plus<float> results in a less efficient serial +/// reduction algorithm being executed. If a slight loss in precision is +/// acceptable, the more efficient parallel \c reduce() algorithm should be +/// used instead. +/// +/// For example: +/// \code +/// // with vec = boost::compute::vector<int> +/// accumulate(vec.begin(), vec.end(), 0, plus<int>()); // fast +/// reduce(vec.begin(), vec.end(), &result, plus<int>()); // fast +/// +/// // with vec = boost::compute::vector<float> +/// accumulate(vec.begin(), vec.end(), 0, plus<float>()); // slow +/// reduce(vec.begin(), vec.end(), &result, plus<float>()); // fast +/// \endcode +/// +/// \see reduce() +template<class InputIterator, class T, class BinaryFunction> +inline T accumulate(InputIterator first, + InputIterator last, + T init, + BinaryFunction function, + command_queue &queue = system::default_queue()) +{ + return detail::dispatch_accumulate(first, last, init, function, queue); +} + +/// \overload +template<class InputIterator, class T> +inline T accumulate(InputIterator first, + InputIterator last, + T init, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<InputIterator>::value_type IT; + + return detail::dispatch_accumulate(first, last, init, plus<IT>(), queue); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP diff --git a/boost/compute/algorithm/adjacent_difference.hpp b/boost/compute/algorithm/adjacent_difference.hpp new file mode 100644 index 0000000000..a8f84e020e --- /dev/null +++ b/boost/compute/algorithm/adjacent_difference.hpp @@ -0,0 +1,98 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_ADJACENT_DIFFERENCE_HPP +#define BOOST_COMPUTE_ALGORITHM_ADJACENT_DIFFERENCE_HPP + +#include <iterator> + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/functional/operator.hpp> +#include <boost/compute/container/vector.hpp> + +namespace boost { +namespace compute { + +/// Stores the difference of each pair of consecutive values in the range +/// [\p first, \p last) to the range beginning at \p result. If \p op is not +/// provided, \c minus<T> is used. +/// +/// \param first first element in the input range +/// \param last last element in the input range +/// \param result first element in the output range +/// \param op binary difference function +/// \param queue command queue to perform the operation +/// +/// \return \c OutputIterator to the end of the result range +/// +/// \see adjacent_find() +template<class InputIterator, class OutputIterator, class BinaryFunction> +inline OutputIterator +adjacent_difference(InputIterator first, + InputIterator last, + OutputIterator result, + BinaryFunction op, + command_queue &queue = system::default_queue()) +{ + if(first == last){ + return result; + } + + size_t count = detail::iterator_range_size(first, last); + + detail::meta_kernel k("adjacent_difference"); + + k << "const uint i = get_global_id(0);\n" + << "if(i == 0){\n" + << " " << result[k.var<uint_>("0")] << " = " << first[k.var<uint_>("0")] << ";\n" + << "}\n" + << "else {\n" + << " " << result[k.var<uint_>("i")] << " = " + << op(first[k.var<uint_>("i")], first[k.var<uint_>("i-1")]) << ";\n" + << "}\n"; + + k.exec_1d(queue, 0, count, 1); + + return result + count; +} + +/// \overload +template<class InputIterator, class OutputIterator> +inline OutputIterator +adjacent_difference(InputIterator first, + InputIterator last, + OutputIterator result, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<InputIterator>::value_type value_type; + + if (first == result) { + vector<value_type> temp(detail::iterator_range_size(first, last), + queue.get_context()); + copy(first, last, temp.begin(), queue); + + return ::boost::compute::adjacent_difference( + temp.begin(), temp.end(), result, ::boost::compute::minus<value_type>(), queue + ); + } + else { + return ::boost::compute::adjacent_difference( + first, last, result, ::boost::compute::minus<value_type>(), queue + ); + } +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_ADJACENT_DIFFERENCE_HPP diff --git a/boost/compute/algorithm/adjacent_find.hpp b/boost/compute/algorithm/adjacent_find.hpp new file mode 100644 index 0000000000..992a01eddc --- /dev/null +++ b/boost/compute/algorithm/adjacent_find.hpp @@ -0,0 +1,162 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_ADJACENT_FIND_HPP +#define BOOST_COMPUTE_ALGORITHM_ADJACENT_FIND_HPP + +#include <iterator> + +#include <boost/compute/command_queue.hpp> +#include <boost/compute/lambda.hpp> +#include <boost/compute/system.hpp> +#include <boost/compute/container/detail/scalar.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/functional/operator.hpp> +#include <boost/compute/type_traits/vector_size.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class InputIterator, class Compare> +inline InputIterator +serial_adjacent_find(InputIterator first, + InputIterator last, + Compare compare, + command_queue &queue) +{ + if(first == last){ + return last; + } + + const context &context = queue.get_context(); + + detail::scalar<uint_> output(context); + + detail::meta_kernel k("serial_adjacent_find"); + + size_t size_arg = k.add_arg<const uint_>("size"); + size_t output_arg = k.add_arg<uint_ *>(memory_object::global_memory, "output"); + + k << k.decl<uint_>("result") << " = size;\n" + << "for(uint i = 0; i < size - 1; i++){\n" + << " if(" << compare(first[k.expr<uint_>("i")], + first[k.expr<uint_>("i+1")]) << "){\n" + << " result = i;\n" + << " break;\n" + << " }\n" + << "}\n" + << "*output = result;\n"; + + k.set_arg<const uint_>( + size_arg, static_cast<uint_>(detail::iterator_range_size(first, last)) + ); + k.set_arg(output_arg, output.get_buffer()); + + k.exec_1d(queue, 0, 1, 1); + + return first + output.read(queue); +} + +template<class InputIterator, class Compare> +inline InputIterator +adjacent_find_with_atomics(InputIterator first, + InputIterator last, + Compare compare, + command_queue &queue) +{ + if(first == last){ + return last; + } + + const context &context = queue.get_context(); + size_t count = detail::iterator_range_size(first, last); + + // initialize output to the last index + detail::scalar<uint_> output(context); + output.write(static_cast<uint_>(count), queue); + + detail::meta_kernel k("adjacent_find_with_atomics"); + + size_t output_arg = k.add_arg<uint_ *>(memory_object::global_memory, "output"); + + k << "const uint i = get_global_id(0);\n" + << "if(" << compare(first[k.expr<uint_>("i")], + first[k.expr<uint_>("i+1")]) << "){\n" + << " atomic_min(output, i);\n" + << "}\n"; + + k.set_arg(output_arg, output.get_buffer()); + + k.exec_1d(queue, 0, count - 1, 1); + + return first + output.read(queue); +} + +} // end detail namespace + +/// Searches the range [\p first, \p last) for two identical adjacent +/// elements and returns an iterator pointing to the first. +/// +/// \param first first element in the range to search +/// \param last last element in the range to search +/// \param compare binary comparison function +/// \param queue command queue to perform the operation +/// +/// \return \c InputIteratorm to the first element which compares equal +/// to the following element. If none are equal, returns \c last. +/// +/// \see find(), adjacent_difference() +template<class InputIterator, class Compare> +inline InputIterator +adjacent_find(InputIterator first, + InputIterator last, + Compare compare, + command_queue &queue = system::default_queue()) +{ + size_t count = detail::iterator_range_size(first, last); + if(count < 32){ + return detail::serial_adjacent_find(first, last, compare, queue); + } + else { + return detail::adjacent_find_with_atomics(first, last, compare, queue); + } +} + +/// \overload +template<class InputIterator> +inline InputIterator +adjacent_find(InputIterator first, + InputIterator last, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<InputIterator>::value_type value_type; + + using ::boost::compute::lambda::_1; + using ::boost::compute::lambda::_2; + using ::boost::compute::lambda::all; + + if(vector_size<value_type>::value == 1){ + return ::boost::compute::adjacent_find( + first, last, _1 == _2, queue + ); + } + else { + return ::boost::compute::adjacent_find( + first, last, all(_1 == _2), queue + ); + } +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_ADJACENT_FIND_HPP diff --git a/boost/compute/algorithm/all_of.hpp b/boost/compute/algorithm/all_of.hpp new file mode 100644 index 0000000000..34d7518f32 --- /dev/null +++ b/boost/compute/algorithm/all_of.hpp @@ -0,0 +1,36 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_ALL_OF_HPP +#define BOOST_COMPUTE_ALGORITHM_ALL_OF_HPP + +#include <boost/compute/system.hpp> +#include <boost/compute/algorithm/find_if_not.hpp> + +namespace boost { +namespace compute { + +/// Returns \c true if \p predicate returns \c true for all of the elements in +/// the range [\p first, \p last). +/// +/// \see any_of(), none_of() +template<class InputIterator, class UnaryPredicate> +inline bool all_of(InputIterator first, + InputIterator last, + UnaryPredicate predicate, + command_queue &queue = system::default_queue()) +{ + return ::boost::compute::find_if_not(first, last, predicate, queue) == last; +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_ALL_OF_HPP diff --git a/boost/compute/algorithm/any_of.hpp b/boost/compute/algorithm/any_of.hpp new file mode 100644 index 0000000000..b07779597c --- /dev/null +++ b/boost/compute/algorithm/any_of.hpp @@ -0,0 +1,40 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_ANY_OF_HPP +#define BOOST_COMPUTE_ALGORITHM_ANY_OF_HPP + +#include <boost/compute/system.hpp> +#include <boost/compute/algorithm/find_if.hpp> + +namespace boost { +namespace compute { + +/// Returns \c true if \p predicate returns \c true for any of the elements in +/// the range [\p first, \p last). +/// +/// For example, to test if a vector contains any negative values: +/// +/// \snippet test/test_any_all_none_of.cpp any_of +/// +/// \see all_of(), none_of() +template<class InputIterator, class UnaryPredicate> +inline bool any_of(InputIterator first, + InputIterator last, + UnaryPredicate predicate, + command_queue &queue = system::default_queue()) +{ + return ::boost::compute::find_if(first, last, predicate, queue) != last; +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_ANY_OF_HPP diff --git a/boost/compute/algorithm/binary_search.hpp b/boost/compute/algorithm/binary_search.hpp new file mode 100644 index 0000000000..6e19498790 --- /dev/null +++ b/boost/compute/algorithm/binary_search.hpp @@ -0,0 +1,37 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_BINARY_SEARCH_HPP +#define BOOST_COMPUTE_ALGORITHM_BINARY_SEARCH_HPP + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/lower_bound.hpp> + +namespace boost { +namespace compute { + +/// Returns \c true if \p value is in the sorted range [\p first, +/// \p last). +template<class InputIterator, class T> +inline bool binary_search(InputIterator first, + InputIterator last, + const T &value, + command_queue &queue = system::default_queue()) +{ + InputIterator position = lower_bound(first, last, value, queue); + + return position != last && position.read(queue) == value; +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_BINARY_SEARCH_HPP diff --git a/boost/compute/algorithm/copy.hpp b/boost/compute/algorithm/copy.hpp new file mode 100644 index 0000000000..2a25059bba --- /dev/null +++ b/boost/compute/algorithm/copy.hpp @@ -0,0 +1,362 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_COPY_HPP +#define BOOST_COMPUTE_ALGORITHM_COPY_HPP + +#include <algorithm> +#include <iterator> + +#include <boost/utility/enable_if.hpp> + +#include <boost/mpl/and.hpp> +#include <boost/mpl/not.hpp> + +#include <boost/compute/buffer.hpp> +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/detail/copy_on_device.hpp> +#include <boost/compute/algorithm/detail/copy_to_device.hpp> +#include <boost/compute/algorithm/detail/copy_to_host.hpp> +#include <boost/compute/async/future.hpp> +#include <boost/compute/detail/is_contiguous_iterator.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/iterator/buffer_iterator.hpp> +#include <boost/compute/type_traits/is_device_iterator.hpp> + +namespace boost { +namespace compute { +namespace detail { + +namespace mpl = boost::mpl; + +// meta-function returning true if copy() between InputIterator and +// OutputIterator can be implemented with clEnqueueCopyBuffer(). +template<class InputIterator, class OutputIterator> +struct can_copy_with_copy_buffer : + mpl::and_< + boost::is_same< + InputIterator, + buffer_iterator<typename InputIterator::value_type> + >, + boost::is_same< + OutputIterator, + buffer_iterator<typename OutputIterator::value_type> + >, + boost::is_same< + typename InputIterator::value_type, + typename OutputIterator::value_type + > + >::type {}; + +// host -> device +template<class InputIterator, class OutputIterator> +inline OutputIterator +dispatch_copy(InputIterator first, + InputIterator last, + OutputIterator result, + command_queue &queue, + typename boost::enable_if_c< + !is_device_iterator<InputIterator>::value && + is_device_iterator<OutputIterator>::value + >::type* = 0) +{ + if(is_contiguous_iterator<InputIterator>::value){ + return copy_to_device(first, last, result, queue); + } + else { + // for non-contiguous input we first copy the values to + // a temporary std::vector and then copy from there + typedef typename std::iterator_traits<InputIterator>::value_type T; + std::vector<T> vector(first, last); + return copy_to_device(vector.begin(), vector.end(), result, queue); + } +} + +// host -> device (async) +template<class InputIterator, class OutputIterator> +inline future<OutputIterator> +dispatch_copy_async(InputIterator first, + InputIterator last, + OutputIterator result, + command_queue &queue, + typename boost::enable_if_c< + !is_device_iterator<InputIterator>::value && + is_device_iterator<OutputIterator>::value + >::type* = 0) +{ + BOOST_STATIC_ASSERT_MSG( + is_contiguous_iterator<InputIterator>::value, + "copy_async() is only supported for contiguous host iterators" + ); + + return copy_to_device_async(first, last, result, queue); +} + +// device -> host +template<class InputIterator, class OutputIterator> +inline OutputIterator +dispatch_copy(InputIterator first, + InputIterator last, + OutputIterator result, + command_queue &queue, + typename boost::enable_if_c< + is_device_iterator<InputIterator>::value && + !is_device_iterator<OutputIterator>::value + >::type* = 0) +{ + if(is_contiguous_iterator<OutputIterator>::value){ + return copy_to_host(first, last, result, queue); + } + else { + // for non-contiguous input we first copy the values to + // a temporary std::vector and then copy from there + typedef typename std::iterator_traits<InputIterator>::value_type T; + std::vector<T> vector(iterator_range_size(first, last)); + copy_to_host(first, last, vector.begin(), queue); + return std::copy(vector.begin(), vector.end(), result); + } +} + +// device -> host (async) +template<class InputIterator, class OutputIterator> +inline future<OutputIterator> +dispatch_copy_async(InputIterator first, + InputIterator last, + OutputIterator result, + command_queue &queue, + typename boost::enable_if_c< + is_device_iterator<InputIterator>::value && + !is_device_iterator<OutputIterator>::value + >::type* = 0) +{ + BOOST_STATIC_ASSERT_MSG( + is_contiguous_iterator<OutputIterator>::value, + "copy_async() is only supported for contiguous host iterators" + ); + + return copy_to_host_async(first, last, result, queue); +} + +// device -> device +template<class InputIterator, class OutputIterator> +inline OutputIterator +dispatch_copy(InputIterator first, + InputIterator last, + OutputIterator result, + command_queue &queue, + typename boost::enable_if< + mpl::and_< + is_device_iterator<InputIterator>, + is_device_iterator<OutputIterator>, + mpl::not_< + can_copy_with_copy_buffer< + InputIterator, OutputIterator + > + > + > + >::type* = 0) +{ + return copy_on_device(first, last, result, queue); +} + +// device -> device (specialization for buffer iterators) +template<class InputIterator, class OutputIterator> +inline OutputIterator +dispatch_copy(InputIterator first, + InputIterator last, + OutputIterator result, + command_queue &queue, + typename boost::enable_if< + mpl::and_< + is_device_iterator<InputIterator>, + is_device_iterator<OutputIterator>, + can_copy_with_copy_buffer< + InputIterator, OutputIterator + > + > + >::type* = 0) +{ + typedef typename std::iterator_traits<InputIterator>::value_type value_type; + typedef typename std::iterator_traits<InputIterator>::difference_type difference_type; + + difference_type n = std::distance(first, last); + if(n < 1){ + // nothing to copy + return result; + } + + queue.enqueue_copy_buffer(first.get_buffer(), + result.get_buffer(), + first.get_index() * sizeof(value_type), + result.get_index() * sizeof(value_type), + static_cast<size_t>(n) * sizeof(value_type)); + return result + n; +} + +// device -> device (async) +template<class InputIterator, class OutputIterator> +inline future<OutputIterator> +dispatch_copy_async(InputIterator first, + InputIterator last, + OutputIterator result, + command_queue &queue, + typename boost::enable_if< + mpl::and_< + is_device_iterator<InputIterator>, + is_device_iterator<OutputIterator>, + mpl::not_< + can_copy_with_copy_buffer< + InputIterator, OutputIterator + > + > + > + >::type* = 0) +{ + return copy_on_device_async(first, last, result, queue); +} + +// device -> device (async, specialization for buffer iterators) +template<class InputIterator, class OutputIterator> +inline future<OutputIterator> +dispatch_copy_async(InputIterator first, + InputIterator last, + OutputIterator result, + command_queue &queue, + typename boost::enable_if< + mpl::and_< + is_device_iterator<InputIterator>, + is_device_iterator<OutputIterator>, + can_copy_with_copy_buffer< + InputIterator, OutputIterator + > + > + >::type* = 0) +{ + typedef typename std::iterator_traits<InputIterator>::value_type value_type; + typedef typename std::iterator_traits<InputIterator>::difference_type difference_type; + + difference_type n = std::distance(first, last); + if(n < 1){ + // nothing to copy + return make_future(result, event()); + } + + event event_ = + queue.enqueue_copy_buffer( + first.get_buffer(), + result.get_buffer(), + first.get_index() * sizeof(value_type), + result.get_index() * sizeof(value_type), + static_cast<size_t>(n) * sizeof(value_type) + ); + + return make_future(result + n, event_); +} + +// host -> host +template<class InputIterator, class OutputIterator> +inline OutputIterator +dispatch_copy(InputIterator first, + InputIterator last, + OutputIterator result, + command_queue &queue, + typename boost::enable_if_c< + !is_device_iterator<InputIterator>::value && + !is_device_iterator<OutputIterator>::value + >::type* = 0) +{ + (void) queue; + + return std::copy(first, last, result); +} + +} // end detail namespace + +/// Copies the values in the range [\p first, \p last) to the range +/// beginning at \p result. +/// +/// The generic copy() function can be used for a variety of data +/// transfer tasks and provides a standard interface to the following +/// OpenCL functions: +/// +/// \li \c clEnqueueReadBuffer() +/// \li \c clEnqueueWriteBuffer() +/// \li \c clEnqueueCopyBuffer() +/// +/// Unlike the aforementioned OpenCL functions, copy() will also work +/// with non-contiguous data-structures (e.g. \c std::list<T>) as +/// well as with "fancy" iterators (e.g. transform_iterator). +/// +/// \param first first element in the range to copy +/// \param last last element in the range to copy +/// \param result first element in the result range +/// \param queue command queue to perform the operation +/// +/// \return \c OutputIterator to the end of the result range +/// +/// For example, to copy an array of \c int values on the host to a vector on +/// the device: +/// \code +/// // array on the host +/// int data[] = { 1, 2, 3, 4 }; +/// +/// // vector on the device +/// boost::compute::vector<int> vec(4, context); +/// +/// // copy values to the device vector +/// boost::compute::copy(data, data + 4, vec.begin(), queue); +/// \endcode +/// +/// The copy algorithm can also be used with standard containers such as +/// \c std::vector<T>: +/// \code +/// std::vector<int> host_vector = ... +/// boost::compute::vector<int> device_vector = ... +/// +/// // copy from the host to the device +/// boost::compute::copy( +/// host_vector.begin(), host_vector.end(), device_vector.begin(), queue +/// ); +/// +/// // copy from the device to the host +/// boost::compute::copy( +/// device_vector.begin(), device_vector.end(), host_vector.begin(), queue +/// ); +/// \endcode +/// +/// \see copy_n(), copy_if(), copy_async() +template<class InputIterator, class OutputIterator> +inline OutputIterator copy(InputIterator first, + InputIterator last, + OutputIterator result, + command_queue &queue = system::default_queue()) +{ + return detail::dispatch_copy(first, last, result, queue); +} + +/// Copies the values in the range [\p first, \p last) to the range +/// beginning at \p result. The copy is performed asynchronously. +/// +/// \see copy() +template<class InputIterator, class OutputIterator> +inline future<OutputIterator> +copy_async(InputIterator first, + InputIterator last, + OutputIterator result, + command_queue &queue = system::default_queue()) +{ + return detail::dispatch_copy_async(first, last, result, queue); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_COPY_HPP diff --git a/boost/compute/algorithm/copy_if.hpp b/boost/compute/algorithm/copy_if.hpp new file mode 100644 index 0000000000..3cd08ef293 --- /dev/null +++ b/boost/compute/algorithm/copy_if.hpp @@ -0,0 +1,58 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_COPY_IF_HPP +#define BOOST_COMPUTE_ALGORITHM_COPY_IF_HPP + +#include <boost/compute/algorithm/transform_if.hpp> +#include <boost/compute/functional/identity.hpp> + +namespace boost { +namespace compute { +namespace detail { + +// like the copy_if() algorithm but writes the indices of the values for which +// predicate returns true. +template<class InputIterator, class OutputIterator, class Predicate> +inline OutputIterator copy_index_if(InputIterator first, + InputIterator last, + OutputIterator result, + Predicate predicate, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<InputIterator>::value_type T; + + return detail::transform_if_impl( + first, last, result, identity<T>(), predicate, true, queue + ); +} + +} // end detail namespace + +/// Copies each element in the range [\p first, \p last) for which +/// \p predicate returns \c true to the range beginning at \p result. +template<class InputIterator, class OutputIterator, class Predicate> +inline OutputIterator copy_if(InputIterator first, + InputIterator last, + OutputIterator result, + Predicate predicate, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<InputIterator>::value_type T; + + return ::boost::compute::transform_if( + first, last, result, identity<T>(), predicate, queue + ); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_COPY_IF_HPP diff --git a/boost/compute/algorithm/copy_n.hpp b/boost/compute/algorithm/copy_n.hpp new file mode 100644 index 0000000000..f0989edc67 --- /dev/null +++ b/boost/compute/algorithm/copy_n.hpp @@ -0,0 +1,51 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_COPY_N_HPP +#define BOOST_COMPUTE_ALGORITHM_COPY_N_HPP + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/copy.hpp> + +namespace boost { +namespace compute { + +/// Copies \p count elements from \p first to \p result. +/// +/// For example, to copy four values from the host to the device: +/// \code +/// // values on the host and vector on the device +/// float values[4] = { 1.f, 2.f, 3.f, 4.f }; +/// boost::compute::vector<float> vec(4, context); +/// +/// // copy from the host to the device +/// boost::compute::copy_n(values, 4, vec.begin(), queue); +/// \endcode +/// +/// \see copy() +template<class InputIterator, class Size, class OutputIterator> +inline OutputIterator copy_n(InputIterator first, + Size count, + OutputIterator result, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<InputIterator>::difference_type difference_type; + + return ::boost::compute::copy(first, + first + static_cast<difference_type>(count), + result, + queue); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_COPY_N_HPP diff --git a/boost/compute/algorithm/count.hpp b/boost/compute/algorithm/count.hpp new file mode 100644 index 0000000000..140d67379f --- /dev/null +++ b/boost/compute/algorithm/count.hpp @@ -0,0 +1,55 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_COUNT_HPP +#define BOOST_COMPUTE_ALGORITHM_COUNT_HPP + +#include <boost/compute/lambda.hpp> +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/count_if.hpp> +#include <boost/compute/type_traits/vector_size.hpp> + +namespace boost { +namespace compute { + +/// Returns the number of occurrences of \p value in the range +/// [\p first, \p last). +/// +/// \see count_if() +template<class InputIterator, class T> +inline size_t count(InputIterator first, + InputIterator last, + const T &value, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<InputIterator>::value_type value_type; + + using ::boost::compute::_1; + using ::boost::compute::lambda::all; + + if(vector_size<value_type>::value == 1){ + return ::boost::compute::count_if(first, + last, + _1 == value, + queue); + } + else { + return ::boost::compute::count_if(first, + last, + all(_1 == value), + queue); + } +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_COUNT_HPP diff --git a/boost/compute/algorithm/count_if.hpp b/boost/compute/algorithm/count_if.hpp new file mode 100644 index 0000000000..c9381ce5d4 --- /dev/null +++ b/boost/compute/algorithm/count_if.hpp @@ -0,0 +1,62 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_COUNT_IF_HPP +#define BOOST_COMPUTE_ALGORITHM_COUNT_IF_HPP + +#include <boost/compute/device.hpp> +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/detail/count_if_with_ballot.hpp> +#include <boost/compute/algorithm/detail/count_if_with_reduce.hpp> +#include <boost/compute/algorithm/detail/count_if_with_threads.hpp> +#include <boost/compute/algorithm/detail/serial_count_if.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> + +namespace boost { +namespace compute { + +/// Returns the number of elements in the range [\p first, \p last) +/// for which \p predicate returns \c true. +template<class InputIterator, class Predicate> +inline size_t count_if(InputIterator first, + InputIterator last, + Predicate predicate, + command_queue &queue = system::default_queue()) +{ + const device &device = queue.get_device(); + + size_t input_size = detail::iterator_range_size(first, last); + if(input_size == 0){ + return 0; + } + + if(device.type() & device::cpu){ + if(input_size < 1024){ + return detail::serial_count_if(first, last, predicate, queue); + } + else { + return detail::count_if_with_threads(first, last, predicate, queue); + } + } + else { + if(input_size < 32){ + return detail::serial_count_if(first, last, predicate, queue); + } + else { + return detail::count_if_with_reduce(first, last, predicate, queue); + } + } +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_COUNT_IF_HPP diff --git a/boost/compute/algorithm/detail/balanced_path.hpp b/boost/compute/algorithm/detail/balanced_path.hpp new file mode 100644 index 0000000000..e5025532d3 --- /dev/null +++ b/boost/compute/algorithm/detail/balanced_path.hpp @@ -0,0 +1,162 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_BALANCED_PATH_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_BALANCED_PATH_HPP + +#include <iterator> + +#include <boost/compute/algorithm/find_if.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/lambda.hpp> +#include <boost/compute/system.hpp> + +namespace boost { +namespace compute { +namespace detail { + +/// +/// \brief Balanced Path kernel class +/// +/// Subclass of meta_kernel to break two sets into tiles according +/// to their balanced path. +/// +class balanced_path_kernel : public meta_kernel +{ +public: + unsigned int tile_size; + + balanced_path_kernel() : meta_kernel("balanced_path") + { + tile_size = 4; + } + + template<class InputIterator1, class InputIterator2, + class OutputIterator1, class OutputIterator2, + class Compare> + void set_range(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator1 result_a, + OutputIterator2 result_b, + Compare comp) + { + typedef typename std::iterator_traits<InputIterator1>::value_type value_type; + + m_a_count = iterator_range_size(first1, last1); + m_a_count_arg = add_arg<uint_>("a_count"); + + m_b_count = iterator_range_size(first2, last2); + m_b_count_arg = add_arg<uint_>("b_count"); + + *this << + "uint i = get_global_id(0);\n" << + "uint target = (i+1)*" << tile_size << ";\n" << + "uint start = max(convert_int(0),convert_int(target)-convert_int(b_count));\n" << + "uint end = min(target,a_count);\n" << + "uint a_index, b_index;\n" << + "while(start<end)\n" << + "{\n" << + " a_index = (start + end)/2;\n" << + " b_index = target - a_index - 1;\n" << + " if(!(" << comp(first2[expr<uint_>("b_index")], + first1[expr<uint_>("a_index")]) << "))\n" << + " start = a_index + 1;\n" << + " else end = a_index;\n" << + "}\n" << + "a_index = start;\n" << + "b_index = target - start;\n" << + "if(b_index < b_count)\n" << + "{\n" << + " " << decl<const value_type>("x") << " = " << + first2[expr<uint_>("b_index")] << ";\n" << + " uint a_start = 0, a_end = a_index, a_mid;\n" << + " uint b_start = 0, b_end = b_index, b_mid;\n" << + " while(a_start<a_end)\n" << + " {\n" << + " a_mid = (a_start + a_end)/2;\n" << + " if(" << comp(first1[expr<uint_>("a_mid")], expr<value_type>("x")) << ")\n" << + " a_start = a_mid+1;\n" << + " else a_end = a_mid;\n" << + " }\n" << + " while(b_start<b_end)\n" << + " {\n" << + " b_mid = (b_start + b_end)/2;\n" << + " if(" << comp(first2[expr<uint_>("b_mid")], expr<value_type>("x")) << ")\n" << + " b_start = b_mid+1;\n" << + " else b_end = b_mid;\n" << + " }\n" << + " uint a_run = a_index - a_start;\n" << + " uint b_run = b_index - b_start;\n" << + " uint x_count = a_run + b_run;\n" << + " uint b_advance = max(x_count / 2, x_count - a_run);\n" << + " b_end = min(b_count, b_start + b_advance + 1);\n" << + " uint temp_start = b_index, temp_end = b_end, temp_mid;" << + " while(temp_start < temp_end)\n" << + " {\n" << + " temp_mid = (temp_start + temp_end + 1)/2;\n" << + " if(" << comp(expr<value_type>("x"), first2[expr<uint_>("temp_mid")]) << ")\n" << + " temp_end = temp_mid-1;\n" << + " else temp_start = temp_mid;\n" << + " }\n" << + " b_run = temp_start - b_start + 1;\n" << + " b_advance = min(b_advance, b_run);\n" << + " uint a_advance = x_count - b_advance;\n" << + " uint star = convert_uint((a_advance == b_advance + 1) " << + "&& (b_advance < b_run));\n" << + " a_index = a_start + a_advance;\n" << + " b_index = target - a_index + star;\n" << + "}\n" << + result_a[expr<uint_>("i")] << " = a_index;\n" << + result_b[expr<uint_>("i")] << " = b_index;\n"; + + } + + template<class InputIterator1, class InputIterator2, + class OutputIterator1, class OutputIterator2> + void set_range(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator1 result_a, + OutputIterator2 result_b) + { + typedef typename std::iterator_traits<InputIterator1>::value_type value_type; + ::boost::compute::less<value_type> less_than; + set_range(first1, last1, first2, last2, result_a, result_b, less_than); + } + + event exec(command_queue &queue) + { + if((m_a_count + m_b_count)/tile_size == 0) { + return event(); + } + + set_arg(m_a_count_arg, uint_(m_a_count)); + set_arg(m_b_count_arg, uint_(m_b_count)); + + return exec_1d(queue, 0, (m_a_count + m_b_count)/tile_size); + } + +private: + size_t m_a_count; + size_t m_a_count_arg; + size_t m_b_count; + size_t m_b_count_arg; +}; + +} //end detail namespace +} //end compute namespace +} //end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_BALANCED_PATH_HPP diff --git a/boost/compute/algorithm/detail/binary_find.hpp b/boost/compute/algorithm/detail/binary_find.hpp new file mode 100644 index 0000000000..27fa11fbaf --- /dev/null +++ b/boost/compute/algorithm/detail/binary_find.hpp @@ -0,0 +1,133 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_BINARY_FIND_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_BINARY_FIND_HPP + +#include <boost/compute/functional.hpp> +#include <boost/compute/algorithm/find_if.hpp> +#include <boost/compute/algorithm/transform.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/detail/parameter_cache.hpp> + +namespace boost { +namespace compute { +namespace detail{ + +/// +/// \brief Binary find kernel class +/// +/// Subclass of meta_kernel to perform single step in binary find. +/// +template<class InputIterator, class UnaryPredicate> +class binary_find_kernel : public meta_kernel +{ +public: + binary_find_kernel(InputIterator first, + InputIterator last, + UnaryPredicate predicate) + : meta_kernel("binary_find") + { + typedef typename std::iterator_traits<InputIterator>::value_type value_type; + + m_index_arg = add_arg<uint_ *>(memory_object::global_memory, "index"); + m_block_arg = add_arg<uint_>("block"); + + atomic_min<uint_> atomic_min_uint; + + *this << + "uint i = get_global_id(0) * block;\n" << + decl<value_type>("value") << "=" << first[var<uint_>("i")] << ";\n" << + "if(" << predicate(var<value_type>("value")) << ") {\n" << + atomic_min_uint(var<uint_ *>("index"), var<uint_>("i")) << ";\n" << + "}\n"; + } + + size_t m_index_arg; + size_t m_block_arg; +}; + +/// +/// \brief Binary find algorithm +/// +/// Finds the end of true values in the partitioned range [first, last). +/// \return Iterator pointing to end of true values +/// +/// \param first Iterator pointing to start of range +/// \param last Iterator pointing to end of range +/// \param predicate Predicate according to which the range is partitioned +/// \param queue Queue on which to execute +/// +template<class InputIterator, class UnaryPredicate> +inline InputIterator binary_find(InputIterator first, + InputIterator last, + UnaryPredicate predicate, + command_queue &queue = system::default_queue()) +{ + const device &device = queue.get_device(); + + boost::shared_ptr<parameter_cache> parameters = + detail::parameter_cache::get_global_cache(device); + + const std::string cache_key = "__boost_binary_find"; + + size_t find_if_limit = 128; + size_t threads = parameters->get(cache_key, "tpb", 128); + size_t count = iterator_range_size(first, last); + + InputIterator search_first = first; + InputIterator search_last = last; + + scalar<uint_> index(queue.get_context()); + + // construct and compile binary_find kernel + binary_find_kernel<InputIterator, UnaryPredicate> + binary_find_kernel(search_first, search_last, predicate); + ::boost::compute::kernel kernel = binary_find_kernel.compile(queue.get_context()); + + // set buffer for index + kernel.set_arg(binary_find_kernel.m_index_arg, index.get_buffer()); + + while(count > find_if_limit) { + index.write(static_cast<uint_>(count), queue); + + // set block and run binary_find kernel + uint_ block = static_cast<uint_>((count - 1)/(threads - 1)); + kernel.set_arg(binary_find_kernel.m_block_arg, block); + queue.enqueue_1d_range_kernel(kernel, 0, threads, 0); + + size_t i = index.read(queue); + + if(i == count) { + search_first = search_last - ((count - 1)%(threads - 1)); + break; + } else { + search_last = search_first + i; + search_first = search_last - ((count - 1)/(threads - 1)); + } + + // Make sure that first and last stay within the input range + search_last = (std::min)(search_last, last); + search_last = (std::max)(search_last, first); + + search_first = (std::max)(search_first, first); + search_first = (std::min)(search_first, last); + + count = iterator_range_size(search_first, search_last); + } + + return find_if(search_first, search_last, predicate, queue); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_BINARY_FIND_HPP diff --git a/boost/compute/algorithm/detail/compact.hpp b/boost/compute/algorithm/detail/compact.hpp new file mode 100644 index 0000000000..983352d543 --- /dev/null +++ b/boost/compute/algorithm/detail/compact.hpp @@ -0,0 +1,77 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COMPACT_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_COMPACT_HPP + +#include <iterator> + +#include <boost/compute/container/vector.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/system.hpp> + +namespace boost { +namespace compute { +namespace detail { + +/// +/// \brief Compact kernel class +/// +/// Subclass of meta_kernel to compact the result of set kernels to +/// get actual sets +/// +class compact_kernel : public meta_kernel +{ +public: + unsigned int tile_size; + + compact_kernel() : meta_kernel("compact") + { + tile_size = 4; + } + + template<class InputIterator1, class InputIterator2, class OutputIterator> + void set_range(InputIterator1 start, + InputIterator2 counts_begin, + InputIterator2 counts_end, + OutputIterator result) + { + m_count = iterator_range_size(counts_begin, counts_end) - 1; + + *this << + "uint i = get_global_id(0);\n" << + "uint count = i*" << tile_size << ";\n" << + "for(uint j = " << counts_begin[expr<uint_>("i")] << "; j<" << + counts_begin[expr<uint_>("i+1")] << "; j++, count++)\n" << + "{\n" << + result[expr<uint_>("j")] << " = " << start[expr<uint_>("count")] + << ";\n" << + "}\n"; + } + + event exec(command_queue &queue) + { + if(m_count == 0) { + return event(); + } + + return exec_1d(queue, 0, m_count); + } + +private: + size_t m_count; +}; + +} //end detail namespace +} //end compute namespace +} //end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COMPACT_HPP diff --git a/boost/compute/algorithm/detail/copy_on_device.hpp b/boost/compute/algorithm/detail/copy_on_device.hpp new file mode 100644 index 0000000000..0bcee27ed5 --- /dev/null +++ b/boost/compute/algorithm/detail/copy_on_device.hpp @@ -0,0 +1,190 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_ON_DEVICE_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_ON_DEVICE_HPP + +#include <iterator> + +#include <boost/compute/command_queue.hpp> +#include <boost/compute/async/future.hpp> +#include <boost/compute/iterator/buffer_iterator.hpp> +#include <boost/compute/iterator/discard_iterator.hpp> +#include <boost/compute/memory/svm_ptr.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/detail/parameter_cache.hpp> +#include <boost/compute/detail/work_size.hpp> + +namespace boost { +namespace compute { +namespace detail { + +inline size_t pick_copy_work_group_size(size_t n, const device &device) +{ + (void) device; + + if(n % 32 == 0) return 32; + else if(n % 16 == 0) return 16; + else if(n % 8 == 0) return 8; + else if(n % 4 == 0) return 4; + else if(n % 2 == 0) return 2; + else return 1; +} + +template<class InputIterator, class OutputIterator> +class copy_kernel : public meta_kernel +{ +public: + copy_kernel(const device &device) + : meta_kernel("copy") + { + m_count = 0; + + typedef typename std::iterator_traits<InputIterator>::value_type input_type; + + boost::shared_ptr<parameter_cache> parameters = + detail::parameter_cache::get_global_cache(device); + + std::string cache_key = + "__boost_copy_kernel_" + boost::lexical_cast<std::string>(sizeof(input_type)); + + m_vpt = parameters->get(cache_key, "vpt", 4); + m_tpb = parameters->get(cache_key, "tpb", 128); + } + + void set_range(InputIterator first, + InputIterator last, + OutputIterator result) + { + m_count_arg = add_arg<uint_>("count"); + + *this << + "uint index = get_local_id(0) + " << + "(" << m_vpt * m_tpb << " * get_group_id(0));\n" << + "for(uint i = 0; i < " << m_vpt << "; i++){\n" << + " if(index < count){\n" << + result[expr<uint_>("index")] << '=' << + first[expr<uint_>("index")] << ";\n" << + " index += " << m_tpb << ";\n" + " }\n" + "}\n"; + + m_count = detail::iterator_range_size(first, last); + } + + event exec(command_queue &queue) + { + if(m_count == 0){ + // nothing to do + return event(); + } + + size_t global_work_size = calculate_work_size(m_count, m_vpt, m_tpb); + + set_arg(m_count_arg, uint_(m_count)); + + return exec_1d(queue, 0, global_work_size, m_tpb); + } + +private: + size_t m_count; + size_t m_count_arg; + uint_ m_vpt; + uint_ m_tpb; +}; + +template<class InputIterator, class OutputIterator> +inline OutputIterator copy_on_device(InputIterator first, + InputIterator last, + OutputIterator result, + command_queue &queue) +{ + const device &device = queue.get_device(); + + copy_kernel<InputIterator, OutputIterator> kernel(device); + + kernel.set_range(first, last, result); + kernel.exec(queue); + + return result + std::distance(first, last); +} + +template<class InputIterator> +inline discard_iterator copy_on_device(InputIterator first, + InputIterator last, + discard_iterator result, + command_queue &queue) +{ + (void) queue; + + return result + std::distance(first, last); +} + +template<class InputIterator, class OutputIterator> +inline future<OutputIterator> copy_on_device_async(InputIterator first, + InputIterator last, + OutputIterator result, + command_queue &queue) +{ + const device &device = queue.get_device(); + + copy_kernel<InputIterator, OutputIterator> kernel(device); + + kernel.set_range(first, last, result); + event event_ = kernel.exec(queue); + + return make_future(result + std::distance(first, last), event_); +} + +#ifdef CL_VERSION_2_0 +// copy_on_device() specialization for svm_ptr +template<class T> +inline svm_ptr<T> copy_on_device(svm_ptr<T> first, + svm_ptr<T> last, + svm_ptr<T> result, + command_queue &queue) +{ + size_t count = iterator_range_size(first, last); + if(count == 0){ + return result; + } + + queue.enqueue_svm_memcpy( + result.get(), first.get(), count * sizeof(T) + ); + + return result + count; +} + +template<class T> +inline future<svm_ptr<T> > copy_on_device_async(svm_ptr<T> first, + svm_ptr<T> last, + svm_ptr<T> result, + command_queue &queue) +{ + size_t count = iterator_range_size(first, last); + if(count == 0){ + return result; + } + + event event_ = queue.enqueue_svm_memcpy_async( + result.get(), first.get(), count * sizeof(T) + ); + + return make_future(result + count, event_); +} +#endif // CL_VERSION_2_0 + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_ON_DEVICE_HPP diff --git a/boost/compute/algorithm/detail/copy_to_device.hpp b/boost/compute/algorithm/detail/copy_to_device.hpp new file mode 100644 index 0000000000..90545fb4ed --- /dev/null +++ b/boost/compute/algorithm/detail/copy_to_device.hpp @@ -0,0 +1,127 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_TO_DEVICE_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_TO_DEVICE_HPP + +#include <iterator> + +#include <boost/utility/addressof.hpp> + +#include <boost/compute/command_queue.hpp> +#include <boost/compute/async/future.hpp> +#include <boost/compute/iterator/buffer_iterator.hpp> +#include <boost/compute/memory/svm_ptr.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class HostIterator, class DeviceIterator> +inline DeviceIterator copy_to_device(HostIterator first, + HostIterator last, + DeviceIterator result, + command_queue &queue) +{ + typedef typename + std::iterator_traits<DeviceIterator>::value_type + value_type; + typedef typename + std::iterator_traits<DeviceIterator>::difference_type + difference_type; + + size_t count = iterator_range_size(first, last); + if(count == 0){ + return result; + } + + size_t offset = result.get_index(); + + queue.enqueue_write_buffer(result.get_buffer(), + offset * sizeof(value_type), + count * sizeof(value_type), + ::boost::addressof(*first)); + + return result + static_cast<difference_type>(count); +} + +template<class HostIterator, class DeviceIterator> +inline future<DeviceIterator> copy_to_device_async(HostIterator first, + HostIterator last, + DeviceIterator result, + command_queue &queue) +{ + typedef typename + std::iterator_traits<DeviceIterator>::value_type + value_type; + typedef typename + std::iterator_traits<DeviceIterator>::difference_type + difference_type; + + size_t count = iterator_range_size(first, last); + if(count == 0){ + return future<DeviceIterator>(); + } + + size_t offset = result.get_index(); + + event event_ = + queue.enqueue_write_buffer_async(result.get_buffer(), + offset * sizeof(value_type), + count * sizeof(value_type), + ::boost::addressof(*first)); + + return make_future(result + static_cast<difference_type>(count), event_); +} + +#ifdef CL_VERSION_2_0 +// copy_to_device() specialization for svm_ptr +template<class HostIterator, class T> +inline svm_ptr<T> copy_to_device(HostIterator first, + HostIterator last, + svm_ptr<T> result, + command_queue &queue) +{ + size_t count = iterator_range_size(first, last); + if(count == 0){ + return result; + } + + queue.enqueue_svm_memcpy( + result.get(), ::boost::addressof(*first), count * sizeof(T) + ); + + return result + count; +} + +template<class HostIterator, class T> +inline future<svm_ptr<T> > copy_to_device_async(HostIterator first, + HostIterator last, + svm_ptr<T> result, + command_queue &queue) +{ + size_t count = iterator_range_size(first, last); + if(count == 0){ + return result; + } + + event event_ = queue.enqueue_svm_memcpy_async( + result.get(), ::boost::addressof(*first), count * sizeof(T) + ); + + return make_future(result + count, event_); +} +#endif // CL_VERSION_2_0 + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_TO_DEVICE_HPP diff --git a/boost/compute/algorithm/detail/copy_to_host.hpp b/boost/compute/algorithm/detail/copy_to_host.hpp new file mode 100644 index 0000000000..b889e0c871 --- /dev/null +++ b/boost/compute/algorithm/detail/copy_to_host.hpp @@ -0,0 +1,137 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_TO_HOST_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_TO_HOST_HPP + +#include <iterator> + +#include <boost/utility/addressof.hpp> + +#include <boost/compute/command_queue.hpp> +#include <boost/compute/async/future.hpp> +#include <boost/compute/iterator/buffer_iterator.hpp> +#include <boost/compute/memory/svm_ptr.hpp> +#include <boost/compute/detail/iterator_plus_distance.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class DeviceIterator, class HostIterator> +inline HostIterator copy_to_host(DeviceIterator first, + DeviceIterator last, + HostIterator result, + command_queue &queue) +{ + typedef typename + std::iterator_traits<DeviceIterator>::value_type + value_type; + + size_t count = iterator_range_size(first, last); + if(count == 0){ + return result; + } + + const buffer &buffer = first.get_buffer(); + size_t offset = first.get_index(); + + queue.enqueue_read_buffer(buffer, + offset * sizeof(value_type), + count * sizeof(value_type), + ::boost::addressof(*result)); + + return iterator_plus_distance(result, count); +} + +// copy_to_host() specialization for std::vector<bool> +template<class DeviceIterator> +inline std::vector<bool>::iterator +copy_to_host(DeviceIterator first, + DeviceIterator last, + std::vector<bool>::iterator result, + command_queue &queue) +{ + std::vector<uint8_t> temp(std::distance(first, last)); + copy_to_host(first, last, temp.begin(), queue); + return std::copy(temp.begin(), temp.end(), result); +} + +template<class DeviceIterator, class HostIterator> +inline future<HostIterator> copy_to_host_async(DeviceIterator first, + DeviceIterator last, + HostIterator result, + command_queue &queue) +{ + typedef typename + std::iterator_traits<DeviceIterator>::value_type + value_type; + + size_t count = iterator_range_size(first, last); + if(count == 0){ + return future<HostIterator>(); + } + + const buffer &buffer = first.get_buffer(); + size_t offset = first.get_index(); + + event event_ = + queue.enqueue_read_buffer_async(buffer, + offset * sizeof(value_type), + count * sizeof(value_type), + ::boost::addressof(*result)); + + return make_future(iterator_plus_distance(result, count), event_); +} + +#ifdef CL_VERSION_2_0 +// copy_to_host() specialization for svm_ptr +template<class T, class HostIterator> +inline HostIterator copy_to_host(svm_ptr<T> first, + svm_ptr<T> last, + HostIterator result, + command_queue &queue) +{ + size_t count = iterator_range_size(first, last); + if(count == 0){ + return result; + } + + queue.enqueue_svm_memcpy( + ::boost::addressof(*result), first.get(), count * sizeof(T) + ); + + return result + count; +} + +template<class T, class HostIterator> +inline future<HostIterator> copy_to_host_async(svm_ptr<T> first, + svm_ptr<T> last, + HostIterator result, + command_queue &queue) +{ + size_t count = iterator_range_size(first, last); + if(count == 0){ + return result; + } + + event event_ = queue.enqueue_svm_memcpy_async( + ::boost::addressof(*result), first.get(), count * sizeof(T) + ); + + return make_future(iterator_plus_distance(result, count), event_); +} +#endif // CL_VERSION_2_0 + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_TO_HOST_HPP diff --git a/boost/compute/algorithm/detail/count_if_with_ballot.hpp b/boost/compute/algorithm/detail/count_if_with_ballot.hpp new file mode 100644 index 0000000000..584ef37ab9 --- /dev/null +++ b/boost/compute/algorithm/detail/count_if_with_ballot.hpp @@ -0,0 +1,78 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_BALLOT_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_BALLOT_HPP + +#include <boost/compute/context.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/algorithm/reduce.hpp> +#include <boost/compute/functional/detail/nvidia_ballot.hpp> +#include <boost/compute/functional/detail/nvidia_popcount.hpp> +#include <boost/compute/detail/meta_kernel.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class InputIterator, class Predicate> +inline size_t count_if_with_ballot(InputIterator first, + InputIterator last, + Predicate predicate, + command_queue &queue) +{ + size_t count = iterator_range_size(first, last); + size_t block_size = 32; + size_t block_count = count / block_size; + if(block_count * block_size != count){ + block_count++; + } + + const ::boost::compute::context &context = queue.get_context(); + + ::boost::compute::vector<uint_> counts(block_count, context); + + ::boost::compute::detail::nvidia_popcount<uint_> popc; + ::boost::compute::detail::nvidia_ballot<uint_> ballot; + + meta_kernel k("count_if_with_ballot"); + k << + "const uint gid = get_global_id(0);\n" << + + "bool value = false;\n" << + "if(gid < count)\n" << + " value = " << predicate(first[k.var<const uint_>("gid")]) << ";\n" << + + "uint bits = " << ballot(k.var<const uint_>("value")) << ";\n" << + + "if(get_local_id(0) == 0)\n" << + counts.begin()[k.var<uint_>("get_group_id(0)") ] + << " = " << popc(k.var<uint_>("bits")) << ";\n"; + + k.add_set_arg<const uint_>("count", count); + + k.exec_1d(queue, 0, block_size * block_count, block_size); + + uint_ result; + ::boost::compute::reduce( + counts.begin(), + counts.end(), + &result, + queue + ); + return result; +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_BALLOT_HPP diff --git a/boost/compute/algorithm/detail/count_if_with_reduce.hpp b/boost/compute/algorithm/detail/count_if_with_reduce.hpp new file mode 100644 index 0000000000..f9449f4a41 --- /dev/null +++ b/boost/compute/algorithm/detail/count_if_with_reduce.hpp @@ -0,0 +1,87 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_REDUCE_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_REDUCE_HPP + +#include <boost/compute/algorithm/reduce.hpp> +#include <boost/compute/iterator/transform_iterator.hpp> +#include <boost/compute/types/fundamental.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class Predicate, class Arg> +struct invoked_countable_predicate +{ + invoked_countable_predicate(Predicate p, Arg a) + : predicate(p), arg(a) + { + } + + Predicate predicate; + Arg arg; +}; + +template<class Predicate, class Arg> +inline meta_kernel& operator<<(meta_kernel &kernel, + const invoked_countable_predicate<Predicate, Arg> &expr) +{ + return kernel << "(" << expr.predicate(expr.arg) << " ? 1 : 0)"; +} + +// the countable_predicate wraps Predicate and converts its result from +// bool to ulong so that it can be used with reduce() +template<class Predicate> +struct countable_predicate +{ + typedef ulong_ result_type; + + countable_predicate(Predicate predicate) + : m_predicate(predicate) + { + } + + template<class Arg> + invoked_countable_predicate<Predicate, Arg> operator()(const Arg &arg) const + { + return invoked_countable_predicate<Predicate, Arg>(m_predicate, arg); + } + + Predicate m_predicate; +}; + +// counts the number of elements matching predicate using reduce() +template<class InputIterator, class Predicate> +inline size_t count_if_with_reduce(InputIterator first, + InputIterator last, + Predicate predicate, + command_queue &queue) +{ + countable_predicate<Predicate> reduce_predicate(predicate); + + ulong_ count = 0; + ::boost::compute::reduce( + ::boost::compute::make_transform_iterator(first, reduce_predicate), + ::boost::compute::make_transform_iterator(last, reduce_predicate), + &count, + ::boost::compute::plus<ulong_>(), + queue + ); + + return static_cast<size_t>(count); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_REDUCE_HPP diff --git a/boost/compute/algorithm/detail/count_if_with_threads.hpp b/boost/compute/algorithm/detail/count_if_with_threads.hpp new file mode 100644 index 0000000000..6f282982e0 --- /dev/null +++ b/boost/compute/algorithm/detail/count_if_with_threads.hpp @@ -0,0 +1,129 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP + +#include <numeric> + +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/container/vector.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class InputIterator, class Predicate> +class count_if_with_threads_kernel : meta_kernel +{ +public: + typedef typename + std::iterator_traits<InputIterator>::value_type + value_type; + + count_if_with_threads_kernel() + : meta_kernel("count_if_with_threads") + { + } + + void set_args(InputIterator first, + InputIterator last, + Predicate predicate) + + { + typedef typename std::iterator_traits<InputIterator>::value_type T; + + m_size = detail::iterator_range_size(first, last); + + m_size_arg = add_arg<const ulong_>("size"); + m_counts_arg = add_arg<ulong_ *>(memory_object::global_memory, "counts"); + + *this << + // thread parameters + "const uint gid = get_global_id(0);\n" << + "const uint block_size = size / get_global_size(0);\n" << + "const uint start = block_size * gid;\n" << + "uint end = 0;\n" << + "if(gid == get_global_size(0) - 1)\n" << + " end = size;\n" << + "else\n" << + " end = block_size * gid + block_size;\n" << + + // count values + "uint count = 0;\n" << + "for(uint i = start; i < end; i++){\n" << + decl<const T>("value") << "=" + << first[expr<uint_>("i")] << ";\n" << + if_(predicate(var<const T>("value"))) << "{\n" << + "count++;\n" << + "}\n" << + "}\n" << + + // write count + "counts[gid] = count;\n"; + } + + size_t exec(command_queue &queue) + { + const device &device = queue.get_device(); + const context &context = queue.get_context(); + + size_t threads = device.compute_units(); + + const size_t minimum_block_size = 2048; + if(m_size / threads < minimum_block_size){ + threads = static_cast<size_t>( + (std::max)( + std::ceil(float(m_size) / minimum_block_size), + 1.0f + ) + ); + } + + // storage for counts + ::boost::compute::vector<ulong_> counts(threads, context); + + // exec kernel + set_arg(m_size_arg, static_cast<ulong_>(m_size)); + set_arg(m_counts_arg, counts.get_buffer()); + exec_1d(queue, 0, threads, 1); + + // copy counts to the host + std::vector<ulong_> host_counts(threads); + ::boost::compute::copy(counts.begin(), counts.end(), host_counts.begin(), queue); + + // return sum of counts + return std::accumulate(host_counts.begin(), host_counts.end(), size_t(0)); + } + +private: + size_t m_size; + size_t m_size_arg; + size_t m_counts_arg; +}; + +// counts values that match the predicate using one thread per block. this is +// optimized for cpu-type devices with a small number of compute units. +template<class InputIterator, class Predicate> +inline size_t count_if_with_threads(InputIterator first, + InputIterator last, + Predicate predicate, + command_queue &queue) +{ + count_if_with_threads_kernel<InputIterator, Predicate> kernel; + kernel.set_args(first, last, predicate); + return kernel.exec(queue); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP diff --git a/boost/compute/algorithm/detail/find_extrema.hpp b/boost/compute/algorithm/detail/find_extrema.hpp new file mode 100644 index 0000000000..6e756c3904 --- /dev/null +++ b/boost/compute/algorithm/detail/find_extrema.hpp @@ -0,0 +1,64 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_HPP + +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/algorithm/detail/find_extrema_with_reduce.hpp> +#include <boost/compute/algorithm/detail/find_extrema_with_atomics.hpp> +#include <boost/compute/algorithm/detail/serial_find_extrema.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class InputIterator, class Compare> +inline InputIterator find_extrema(InputIterator first, + InputIterator last, + Compare compare, + const bool find_minimum, + command_queue &queue) +{ + size_t count = iterator_range_size(first, last); + + // handle trivial cases + if(count == 0 || count == 1){ + return first; + } + + const device &device = queue.get_device(); + + // use serial method for small inputs + // and when device is a CPU + if(count < 512 || (device.type() & device::cpu)){ + return serial_find_extrema(first, last, compare, find_minimum, queue); + } + + // find_extrema_with_reduce() is used only if requirements are met + if(find_extrema_with_reduce_requirements_met(first, last, queue)) + { + return find_extrema_with_reduce(first, last, compare, find_minimum, queue); + } + + // use serial method for OpenCL version 1.0 due to + // problems with atomic_cmpxchg() + #ifndef CL_VERSION_1_1 + return serial_find_extrema(first, last, compare, find_minimum, queue); + #endif + + return find_extrema_with_atomics(first, last, compare, find_minimum, queue); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_HPP diff --git a/boost/compute/algorithm/detail/find_extrema_with_atomics.hpp b/boost/compute/algorithm/detail/find_extrema_with_atomics.hpp new file mode 100644 index 0000000000..406d1becb7 --- /dev/null +++ b/boost/compute/algorithm/detail/find_extrema_with_atomics.hpp @@ -0,0 +1,108 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_WITH_ATOMICS_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_WITH_ATOMICS_HPP + +#include <boost/compute/types.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/container/detail/scalar.hpp> +#include <boost/compute/functional/atomic.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class InputIterator, class Compare> +inline InputIterator find_extrema_with_atomics(InputIterator first, + InputIterator last, + Compare compare, + const bool find_minimum, + command_queue &queue) +{ + typedef typename std::iterator_traits<InputIterator>::value_type value_type; + typedef typename std::iterator_traits<InputIterator>::difference_type difference_type; + + const context &context = queue.get_context(); + + meta_kernel k("find_extrema"); + atomic_cmpxchg<uint_> atomic_cmpxchg_uint; + + k << + "const uint gid = get_global_id(0);\n" << + "uint old_index = *index;\n" << + + k.decl<value_type>("old") << + " = " << first[k.var<uint_>("old_index")] << ";\n" << + k.decl<value_type>("new") << + " = " << first[k.var<uint_>("gid")] << ";\n" << + + k.decl<bool>("compare_result") << ";\n" << + "#ifdef BOOST_COMPUTE_FIND_MAXIMUM\n" << + "while(" << + "(compare_result = " << compare(k.var<value_type>("old"), + k.var<value_type>("new")) << ")" << + " || (!(compare_result" << + " || " << compare(k.var<value_type>("new"), + k.var<value_type>("old")) << ") " + "&& gid < old_index)){\n" << + "#else\n" << + // while condition explained for minimum case with less (<) + // as comparison function: + // while(new_value < old_value + // OR (new_value == old_value AND new_index < old_index)) + "while(" << + "(compare_result = " << compare(k.var<value_type>("new"), + k.var<value_type>("old")) << ")" << + " || (!(compare_result" << + " || " << compare(k.var<value_type>("old"), + k.var<value_type>("new")) << ") " + "&& gid < old_index)){\n" << + "#endif\n" << + + " if(" << atomic_cmpxchg_uint(k.var<uint_ *>("index"), + k.var<uint_>("old_index"), + k.var<uint_>("gid")) << " == old_index)\n" << + " break;\n" << + " else\n" << + " old_index = *index;\n" << + "old = " << first[k.var<uint_>("old_index")] << ";\n" << + "}\n"; + + size_t index_arg_index = k.add_arg<uint_ *>(memory_object::global_memory, "index"); + + std::string options; + if(!find_minimum){ + options = "-DBOOST_COMPUTE_FIND_MAXIMUM"; + } + kernel kernel = k.compile(context, options); + + // setup index buffer + scalar<uint_> index(context); + kernel.set_arg(index_arg_index, index.get_buffer()); + + // initialize index + index.write(0, queue); + + // run kernel + size_t count = iterator_range_size(first, last); + queue.enqueue_1d_range_kernel(kernel, 0, count, 0); + + // read index and return iterator + return first + static_cast<difference_type>(index.read(queue)); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_WITH_ATOMICS_HPP diff --git a/boost/compute/algorithm/detail/find_extrema_with_reduce.hpp b/boost/compute/algorithm/detail/find_extrema_with_reduce.hpp new file mode 100644 index 0000000000..1fbb7dee19 --- /dev/null +++ b/boost/compute/algorithm/detail/find_extrema_with_reduce.hpp @@ -0,0 +1,443 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_WITH_REDUCE_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_WITH_REDUCE_HPP + +#include <algorithm> + +#include <boost/compute/types.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/copy.hpp> +#include <boost/compute/allocator/pinned_allocator.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/detail/parameter_cache.hpp> +#include <boost/compute/memory/local_buffer.hpp> +#include <boost/compute/type_traits/type_name.hpp> +#include <boost/compute/utility/program_cache.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class InputIterator> +bool find_extrema_with_reduce_requirements_met(InputIterator first, + InputIterator last, + command_queue &queue) +{ + typedef typename std::iterator_traits<InputIterator>::value_type input_type; + + const device &device = queue.get_device(); + + // device must have dedicated local memory storage + // otherwise reduction would be highly inefficient + if(device.get_info<CL_DEVICE_LOCAL_MEM_TYPE>() != CL_LOCAL) + { + return false; + } + + const size_t max_work_group_size = device.get_info<CL_DEVICE_MAX_WORK_GROUP_SIZE>(); + // local memory size in bytes (per compute unit) + const size_t local_mem_size = device.get_info<CL_DEVICE_LOCAL_MEM_SIZE>(); + + std::string cache_key = std::string("__boost_find_extrema_reduce_") + + type_name<input_type>(); + // load parameters + boost::shared_ptr<parameter_cache> parameters = + detail::parameter_cache::get_global_cache(device); + + // Get preferred work group size + size_t work_group_size = parameters->get(cache_key, "wgsize", 256); + + work_group_size = (std::min)(max_work_group_size, work_group_size); + + // local memory size needed to perform parallel reduction + size_t required_local_mem_size = 0; + // indices size + required_local_mem_size += sizeof(uint_) * work_group_size; + // values size + required_local_mem_size += sizeof(input_type) * work_group_size; + + // at least 4 work groups per compute unit otherwise reduction + // would be highly inefficient + return ((required_local_mem_size * 4) <= local_mem_size); +} + +/// \internal_ +/// Algorithm finds the first extremum in given range, i.e., with the lowest +/// index. +/// +/// If \p use_input_idx is false, it's assumed that input data is ordered by +/// increasing index and \p input_idx is not used in the algorithm. +template<class InputIterator, class ResultIterator, class Compare> +inline void find_extrema_with_reduce(InputIterator input, + vector<uint_>::iterator input_idx, + size_t count, + ResultIterator result, + vector<uint_>::iterator result_idx, + size_t work_groups_no, + size_t work_group_size, + Compare compare, + const bool find_minimum, + const bool use_input_idx, + command_queue &queue) +{ + typedef typename std::iterator_traits<InputIterator>::value_type input_type; + + const context &context = queue.get_context(); + + meta_kernel k("find_extrema_reduce"); + size_t count_arg = k.add_arg<uint_>("count"); + size_t block_arg = k.add_arg<input_type *>(memory_object::local_memory, "block"); + size_t block_idx_arg = k.add_arg<uint_ *>(memory_object::local_memory, "block_idx"); + + k << + // Work item global id + k.decl<const uint_>("gid") << " = get_global_id(0);\n" << + + // Index of element that will be read from input buffer + k.decl<uint_>("idx") << " = gid;\n" << + + k.decl<input_type>("acc") << ";\n" << + k.decl<uint_>("acc_idx") << ";\n" << + "if(gid < count) {\n" << + // Real index of currently best element + "#ifdef BOOST_COMPUTE_USE_INPUT_IDX\n" << + k.var<uint_>("acc_idx") << " = " << input_idx[k.var<uint_>("idx")] << ";\n" << + "#else\n" << + k.var<uint_>("acc_idx") << " = idx;\n" << + "#endif\n" << + + // Init accumulator with first[get_global_id(0)] + "acc = " << input[k.var<uint_>("idx")] << ";\n" << + "idx += get_global_size(0);\n" << + "}\n" << + + k.decl<bool>("compare_result") << ";\n" << + k.decl<bool>("equal") << ";\n\n" << + "while( idx < count ){\n" << + // Next element + k.decl<input_type>("next") << " = " << input[k.var<uint_>("idx")] << ";\n" << + "#ifdef BOOST_COMPUTE_USE_INPUT_IDX\n" << + k.decl<input_type>("next_idx") << " = " << input_idx[k.var<uint_>("idx")] << ";\n" << + "#endif\n" << + + // Comparison between currently best element (acc) and next element + "#ifdef BOOST_COMPUTE_FIND_MAXIMUM\n" << + "compare_result = " << compare(k.var<input_type>("next"), + k.var<input_type>("acc")) << ";\n" << + "# ifdef BOOST_COMPUTE_USE_INPUT_IDX\n" << + "equal = !compare_result && !" << + compare(k.var<input_type>("acc"), + k.var<input_type>("next")) << ";\n" << + "# endif\n" << + "#else\n" << + "compare_result = " << compare(k.var<input_type>("acc"), + k.var<input_type>("next")) << ";\n" << + "# ifdef BOOST_COMPUTE_USE_INPUT_IDX\n" << + "equal = !compare_result && !" << + compare(k.var<input_type>("next"), + k.var<input_type>("acc")) << ";\n" << + "# endif\n" << + "#endif\n" << + + // save the winner + "acc = compare_result ? acc : next;\n" << + "#ifdef BOOST_COMPUTE_USE_INPUT_IDX\n" << + "acc_idx = compare_result ? " << + "acc_idx : " << + "(equal ? min(acc_idx, next_idx) : next_idx);\n" << + "#else\n" << + "acc_idx = compare_result ? acc_idx : idx;\n" << + "#endif\n" << + "idx += get_global_size(0);\n" << + "}\n\n" << + + // Work item local id + k.decl<const uint_>("lid") << " = get_local_id(0);\n" << + "block[lid] = acc;\n" << + "block_idx[lid] = acc_idx;\n" << + "barrier(CLK_LOCAL_MEM_FENCE);\n" << + + k.decl<uint_>("group_offset") << + " = count - (get_local_size(0) * get_group_id(0));\n\n"; + + k << + "#pragma unroll\n" + "for(" << k.decl<uint_>("offset") << " = " << uint_(work_group_size) << " / 2; offset > 0; " << + "offset = offset / 2) {\n" << + "if((lid < offset) && ((lid + offset) < group_offset)) { \n" << + k.decl<input_type>("mine") << " = block[lid];\n" << + k.decl<input_type>("other") << " = block[lid+offset];\n" << + "#ifdef BOOST_COMPUTE_FIND_MAXIMUM\n" << + "compare_result = " << compare(k.var<input_type>("other"), + k.var<input_type>("mine")) << ";\n" << + "equal = !compare_result && !" << + compare(k.var<input_type>("mine"), + k.var<input_type>("other")) << ";\n" << + "#else\n" << + "compare_result = " << compare(k.var<input_type>("mine"), + k.var<input_type>("other")) << ";\n" << + "equal = !compare_result && !" << + compare(k.var<input_type>("other"), + k.var<input_type>("mine")) << ";\n" << + "#endif\n" << + "block[lid] = compare_result ? mine : other;\n" << + k.decl<uint_>("mine_idx") << " = block_idx[lid];\n" << + k.decl<uint_>("other_idx") << " = block_idx[lid+offset];\n" << + "block_idx[lid] = compare_result ? " << + "mine_idx : " << + "(equal ? min(mine_idx, other_idx) : other_idx);\n" << + "}\n" + "barrier(CLK_LOCAL_MEM_FENCE);\n" << + "}\n\n" << + + // write block result to global output + "if(lid == 0){\n" << + result[k.var<uint_>("get_group_id(0)")] << " = block[0];\n" << + result_idx[k.var<uint_>("get_group_id(0)")] << " = block_idx[0];\n" << + "}"; + + std::string options; + if(!find_minimum){ + options = "-DBOOST_COMPUTE_FIND_MAXIMUM"; + } + if(use_input_idx){ + options += " -DBOOST_COMPUTE_USE_INPUT_IDX"; + } + + kernel kernel = k.compile(context, options); + + kernel.set_arg(count_arg, static_cast<uint_>(count)); + kernel.set_arg(block_arg, local_buffer<input_type>(work_group_size)); + kernel.set_arg(block_idx_arg, local_buffer<uint_>(work_group_size)); + + queue.enqueue_1d_range_kernel(kernel, + 0, + work_groups_no * work_group_size, + work_group_size); +} + +template<class InputIterator, class ResultIterator, class Compare> +inline void find_extrema_with_reduce(InputIterator input, + size_t count, + ResultIterator result, + vector<uint_>::iterator result_idx, + size_t work_groups_no, + size_t work_group_size, + Compare compare, + const bool find_minimum, + command_queue &queue) +{ + // dummy will not be used + buffer_iterator<uint_> dummy = result_idx; + return find_extrema_with_reduce( + input, dummy, count, result, result_idx, work_groups_no, + work_group_size, compare, find_minimum, false, queue + ); +} + +template<class InputIterator, class Compare> +InputIterator find_extrema_with_reduce(InputIterator first, + InputIterator last, + Compare compare, + const bool find_minimum, + command_queue &queue) +{ + typedef typename std::iterator_traits<InputIterator>::difference_type difference_type; + typedef typename std::iterator_traits<InputIterator>::value_type input_type; + + const context &context = queue.get_context(); + const device &device = queue.get_device(); + + // Getting information about used queue and device + const size_t compute_units_no = device.get_info<CL_DEVICE_MAX_COMPUTE_UNITS>(); + const size_t max_work_group_size = device.get_info<CL_DEVICE_MAX_WORK_GROUP_SIZE>(); + + const size_t count = detail::iterator_range_size(first, last); + + std::string cache_key = std::string("__boost_find_extrema_with_reduce_") + + type_name<input_type>(); + + // load parameters + boost::shared_ptr<parameter_cache> parameters = + detail::parameter_cache::get_global_cache(device); + + // get preferred work group size and preferred number + // of work groups per compute unit + size_t work_group_size = parameters->get(cache_key, "wgsize", 256); + size_t work_groups_per_cu = parameters->get(cache_key, "wgpcu", 100); + + // calculate work group size and number of work groups + work_group_size = (std::min)(max_work_group_size, work_group_size); + size_t work_groups_no = compute_units_no * work_groups_per_cu; + work_groups_no = (std::min)( + work_groups_no, + static_cast<size_t>(std::ceil(float(count) / work_group_size)) + ); + + // phase I: finding candidates for extremum + + // device buffors for extremum candidates and their indices + // each work-group computes its candidate + vector<input_type> candidates(work_groups_no, context); + vector<uint_> candidates_idx(work_groups_no, context); + + // finding candidates for first extremum and their indices + find_extrema_with_reduce( + first, count, candidates.begin(), candidates_idx.begin(), + work_groups_no, work_group_size, compare, find_minimum, queue + ); + + // phase II: finding extremum from among the candidates + + // zero-copy buffers for final result (value and index) + vector<input_type, ::boost::compute::pinned_allocator<input_type> > + result(1, context); + vector<uint_, ::boost::compute::pinned_allocator<uint_> > + result_idx(1, context); + + // get extremum from among the candidates + find_extrema_with_reduce( + candidates.begin(), candidates_idx.begin(), work_groups_no, result.begin(), + result_idx.begin(), 1, work_group_size, compare, find_minimum, true, queue + ); + + // mapping extremum index to host + uint_* result_idx_host_ptr = + static_cast<uint_*>( + queue.enqueue_map_buffer( + result_idx.get_buffer(), command_queue::map_read, + 0, sizeof(uint_) + ) + ); + + return first + static_cast<difference_type>(*result_idx_host_ptr); +} + +template<class InputIterator> +InputIterator find_extrema_with_reduce(InputIterator first, + InputIterator last, + ::boost::compute::less< + typename std::iterator_traits< + InputIterator + >::value_type + > + compare, + const bool find_minimum, + command_queue &queue) +{ + typedef typename std::iterator_traits<InputIterator>::difference_type difference_type; + typedef typename std::iterator_traits<InputIterator>::value_type input_type; + + const context &context = queue.get_context(); + const device &device = queue.get_device(); + + // Getting information about used queue and device + const size_t compute_units_no = device.get_info<CL_DEVICE_MAX_COMPUTE_UNITS>(); + const size_t max_work_group_size = device.get_info<CL_DEVICE_MAX_WORK_GROUP_SIZE>(); + + const size_t count = detail::iterator_range_size(first, last); + + std::string cache_key = std::string("__boost_find_extrema_with_reduce_") + + type_name<input_type>(); + + // load parameters + boost::shared_ptr<parameter_cache> parameters = + detail::parameter_cache::get_global_cache(device); + + // get preferred work group size and preferred number + // of work groups per compute unit + size_t work_group_size = parameters->get(cache_key, "wgsize", 256); + size_t work_groups_per_cu = parameters->get(cache_key, "wgpcu", 64); + + // calculate work group size and number of work groups + work_group_size = (std::min)(max_work_group_size, work_group_size); + size_t work_groups_no = compute_units_no * work_groups_per_cu; + work_groups_no = (std::min)( + work_groups_no, + static_cast<size_t>(std::ceil(float(count) / work_group_size)) + ); + + // phase I: finding candidates for extremum + + // device buffors for extremum candidates and their indices + // each work-group computes its candidate + // zero-copy buffers are used to eliminate copying data back to host + vector<input_type, ::boost::compute::pinned_allocator<input_type> > + candidates(work_groups_no, context); + vector<uint_, ::boost::compute::pinned_allocator <uint_> > + candidates_idx(work_groups_no, context); + + // finding candidates for first extremum and their indices + find_extrema_with_reduce( + first, count, candidates.begin(), candidates_idx.begin(), + work_groups_no, work_group_size, compare, find_minimum, queue + ); + + // phase II: finding extremum from among the candidates + + // mapping candidates and their indices to host + input_type* candidates_host_ptr = + static_cast<input_type*>( + queue.enqueue_map_buffer( + candidates.get_buffer(), command_queue::map_read, + 0, work_groups_no * sizeof(input_type) + ) + ); + + uint_* candidates_idx_host_ptr = + static_cast<uint_*>( + queue.enqueue_map_buffer( + candidates_idx.get_buffer(), command_queue::map_read, + 0, work_groups_no * sizeof(uint_) + ) + ); + + input_type* i = candidates_host_ptr; + uint_* idx = candidates_idx_host_ptr; + uint_* extremum_idx = idx; + input_type extremum = *candidates_host_ptr; + i++; idx++; + + // find extremum (serial) from among the candidates on host + if(!find_minimum) { + while(idx != (candidates_idx_host_ptr + work_groups_no)) { + input_type next = *i; + bool compare_result = next > extremum; + bool equal = next == extremum; + extremum = compare_result ? next : extremum; + extremum_idx = compare_result ? idx : extremum_idx; + extremum_idx = equal ? ((*extremum_idx < *idx) ? extremum_idx : idx) : extremum_idx; + idx++, i++; + } + } + else { + while(idx != (candidates_idx_host_ptr + work_groups_no)) { + input_type next = *i; + bool compare_result = next < extremum; + bool equal = next == extremum; + extremum = compare_result ? next : extremum; + extremum_idx = compare_result ? idx : extremum_idx; + extremum_idx = equal ? ((*extremum_idx < *idx) ? extremum_idx : idx) : extremum_idx; + idx++, i++; + } + } + + return first + static_cast<difference_type>(*extremum_idx); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_EXTREMA_WITH_REDUCE_HPP diff --git a/boost/compute/algorithm/detail/find_if_with_atomics.hpp b/boost/compute/algorithm/detail/find_if_with_atomics.hpp new file mode 100644 index 0000000000..112c34cf00 --- /dev/null +++ b/boost/compute/algorithm/detail/find_if_with_atomics.hpp @@ -0,0 +1,212 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_IF_WITH_ATOMICS_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_IF_WITH_ATOMICS_HPP + +#include <iterator> + +#include <boost/compute/types.hpp> +#include <boost/compute/functional.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/container/detail/scalar.hpp> +#include <boost/compute/iterator/buffer_iterator.hpp> +#include <boost/compute/type_traits/type_name.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/detail/parameter_cache.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class InputIterator, class UnaryPredicate> +inline InputIterator find_if_with_atomics_one_vpt(InputIterator first, + InputIterator last, + UnaryPredicate predicate, + const size_t count, + command_queue &queue) +{ + typedef typename std::iterator_traits<InputIterator>::value_type value_type; + typedef typename std::iterator_traits<InputIterator>::difference_type difference_type; + + const context &context = queue.get_context(); + + detail::meta_kernel k("find_if"); + size_t index_arg = k.add_arg<int *>(memory_object::global_memory, "index"); + atomic_min<uint_> atomic_min_uint; + + k << k.decl<const uint_>("i") << " = get_global_id(0);\n" + << k.decl<const value_type>("value") << "=" + << first[k.var<const uint_>("i")] << ";\n" + << "if(" << predicate(k.var<const value_type>("value")) << "){\n" + << " " << atomic_min_uint(k.var<uint_ *>("index"), k.var<uint_>("i")) << ";\n" + << "}\n"; + + kernel kernel = k.compile(context); + + scalar<uint_> index(context); + kernel.set_arg(index_arg, index.get_buffer()); + + // initialize index to the last iterator's index + index.write(static_cast<uint_>(count), queue); + queue.enqueue_1d_range_kernel(kernel, 0, count, 0); + + // read index and return iterator + return first + static_cast<difference_type>(index.read(queue)); +} + +template<class InputIterator, class UnaryPredicate> +inline InputIterator find_if_with_atomics_multiple_vpt(InputIterator first, + InputIterator last, + UnaryPredicate predicate, + const size_t count, + const size_t vpt, + command_queue &queue) +{ + typedef typename std::iterator_traits<InputIterator>::value_type value_type; + typedef typename std::iterator_traits<InputIterator>::difference_type difference_type; + + const context &context = queue.get_context(); + const device &device = queue.get_device(); + + detail::meta_kernel k("find_if"); + size_t index_arg = k.add_arg<uint_ *>(memory_object::global_memory, "index"); + size_t count_arg = k.add_arg<const uint_>("count"); + size_t vpt_arg = k.add_arg<const uint_>("vpt"); + atomic_min<uint_> atomic_min_uint; + + // for GPUs reads from global memory are coalesced + if(device.type() & device::gpu) { + k << + k.decl<const uint_>("lsize") << " = get_local_size(0);\n" << + k.decl<uint_>("id") << " = get_local_id(0) + get_group_id(0) * lsize * vpt;\n" << + k.decl<const uint_>("end") << " = min(" << + "id + (lsize *" << k.var<uint_>("vpt") << ")," << + "count" << + ");\n" << + + // checking if the index is already found + "__local uint local_index;\n" << + "if(get_local_id(0) == 0){\n" << + " local_index = *index;\n " << + "};\n" << + "barrier(CLK_LOCAL_MEM_FENCE);\n" << + "if(local_index < id){\n" << + " return;\n" << + "}\n" << + + "while(id < end){\n" << + " " << k.decl<const value_type>("value") << " = " << + first[k.var<const uint_>("id")] << ";\n" + " if(" << predicate(k.var<const value_type>("value")) << "){\n" << + " " << atomic_min_uint(k.var<uint_ *>("index"), + k.var<uint_>("id")) << ";\n" << + " return;\n" + " }\n" << + " id+=lsize;\n" << + "}\n"; + // for CPUs (and other devices) reads are ordered so the big cache is + // efficiently used. + } else { + k << + k.decl<uint_>("id") << " = get_global_id(0) * " << k.var<uint_>("vpt") << ";\n" << + k.decl<const uint_>("end") << " = min(" << + "id + " << k.var<uint_>("vpt") << "," << + "count" << + ");\n" << + "while(id < end && (*index) > id){\n" << + " " << k.decl<const value_type>("value") << " = " << + first[k.var<const uint_>("id")] << ";\n" + " if(" << predicate(k.var<const value_type>("value")) << "){\n" << + " " << atomic_min_uint(k.var<uint_ *>("index"), + k.var<uint_>("id")) << ";\n" << + " return;\n" << + " }\n" << + " id++;\n" << + "}\n"; + } + + kernel kernel = k.compile(context); + + scalar<uint_> index(context); + kernel.set_arg(index_arg, index.get_buffer()); + kernel.set_arg(count_arg, static_cast<uint_>(count)); + kernel.set_arg(vpt_arg, static_cast<uint_>(vpt)); + + // initialize index to the last iterator's index + index.write(static_cast<uint_>(count), queue); + + const size_t global_wg_size = static_cast<size_t>( + std::ceil(float(count) / vpt) + ); + queue.enqueue_1d_range_kernel(kernel, 0, global_wg_size, 0); + + // read index and return iterator + return first + static_cast<difference_type>(index.read(queue)); +} + +template<class InputIterator, class UnaryPredicate> +inline InputIterator find_if_with_atomics(InputIterator first, + InputIterator last, + UnaryPredicate predicate, + command_queue &queue) +{ + typedef typename std::iterator_traits<InputIterator>::value_type value_type; + + size_t count = detail::iterator_range_size(first, last); + if(count == 0){ + return last; + } + + const device &device = queue.get_device(); + + // load cached parameters + std::string cache_key = std::string("__boost_find_if_with_atomics_") + + type_name<value_type>(); + boost::shared_ptr<parameter_cache> parameters = + detail::parameter_cache::get_global_cache(device); + + // for relatively small inputs on GPUs kernel checking one value per thread + // (work-item) is more efficient than its multiple values per thread version + if(device.type() & device::gpu){ + const size_t one_vpt_threshold = + parameters->get(cache_key, "one_vpt_threshold", 1048576); + if(count <= one_vpt_threshold){ + return find_if_with_atomics_one_vpt( + first, last, predicate, count, queue + ); + } + } + + // values per thread + size_t vpt; + if(device.type() & device::gpu){ + // get vpt parameter + vpt = parameters->get(cache_key, "vpt", 32); + } else { + // for CPUs work is split equally between compute units + const size_t max_compute_units = + device.get_info<CL_DEVICE_MAX_COMPUTE_UNITS>(); + vpt = static_cast<size_t>( + std::ceil(float(count) / max_compute_units) + ); + } + + return find_if_with_atomics_multiple_vpt( + first, last, predicate, count, vpt, queue + ); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_FIND_IF_WITH_ATOMICS_HPP diff --git a/boost/compute/algorithm/detail/inplace_reduce.hpp b/boost/compute/algorithm/detail/inplace_reduce.hpp new file mode 100644 index 0000000000..60c61e83fe --- /dev/null +++ b/boost/compute/algorithm/detail/inplace_reduce.hpp @@ -0,0 +1,136 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_INPLACE_REDUCE_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_INPLACE_REDUCE_HPP + +#include <iterator> + +#include <boost/utility/result_of.hpp> + +#include <boost/compute/command_queue.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/memory/local_buffer.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class Iterator, class BinaryFunction> +inline void inplace_reduce(Iterator first, + Iterator last, + BinaryFunction function, + command_queue &queue) +{ + typedef typename + std::iterator_traits<Iterator>::value_type + value_type; + + size_t input_size = iterator_range_size(first, last); + if(input_size < 2){ + return; + } + + const context &context = queue.get_context(); + + size_t block_size = 64; + size_t values_per_thread = 8; + size_t block_count = input_size / (block_size * values_per_thread); + if(block_count * block_size * values_per_thread != input_size) + block_count++; + + vector<value_type> output(block_count, context); + + meta_kernel k("inplace_reduce"); + size_t input_arg = k.add_arg<value_type *>(memory_object::global_memory, "input"); + size_t input_size_arg = k.add_arg<const uint_>("input_size"); + size_t output_arg = k.add_arg<value_type *>(memory_object::global_memory, "output"); + size_t scratch_arg = k.add_arg<value_type *>(memory_object::local_memory, "scratch"); + k << + "const uint gid = get_global_id(0);\n" << + "const uint lid = get_local_id(0);\n" << + "const uint values_per_thread =\n" + << uint_(values_per_thread) << ";\n" << + + // thread reduce + "const uint index = gid * values_per_thread;\n" << + "if(index < input_size){\n" << + k.decl<value_type>("sum") << " = input[index];\n" << + "for(uint i = 1;\n" << + "i < values_per_thread && (index + i) < input_size;\n" << + "i++){\n" << + " sum = " << + function(k.var<value_type>("sum"), + k.var<value_type>("input[index+i]")) << ";\n" << + "}\n" << + "scratch[lid] = sum;\n" << + "}\n" << + + // local reduce + "for(uint i = 1; i < get_local_size(0); i <<= 1){\n" << + " barrier(CLK_LOCAL_MEM_FENCE);\n" << + " uint mask = (i << 1) - 1;\n" << + " uint next_index = (gid + i) * values_per_thread;\n" + " if((lid & mask) == 0 && next_index < input_size){\n" << + " scratch[lid] = " << + function(k.var<value_type>("scratch[lid]"), + k.var<value_type>("scratch[lid+i]")) << ";\n" << + " }\n" << + "}\n" << + + // write output for block + "if(lid == 0){\n" << + " output[get_group_id(0)] = scratch[0];\n" << + "}\n" + ; + + const buffer *input_buffer = &first.get_buffer(); + const buffer *output_buffer = &output.get_buffer(); + + kernel kernel = k.compile(context); + + while(input_size > 1){ + kernel.set_arg(input_arg, *input_buffer); + kernel.set_arg(input_size_arg, static_cast<uint_>(input_size)); + kernel.set_arg(output_arg, *output_buffer); + kernel.set_arg(scratch_arg, local_buffer<value_type>(block_size)); + + queue.enqueue_1d_range_kernel(kernel, + 0, + block_count * block_size, + block_size); + + input_size = + static_cast<size_t>( + std::ceil(float(input_size) / (block_size * values_per_thread) + ) + ); + + block_count = input_size / (block_size * values_per_thread); + if(block_count * block_size * values_per_thread != input_size) + block_count++; + + std::swap(input_buffer, output_buffer); + } + + if(input_buffer != &first.get_buffer()){ + ::boost::compute::copy(output.begin(), + output.begin() + 1, + first, + queue); + } +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_INPLACE_REDUCE_HPP diff --git a/boost/compute/algorithm/detail/insertion_sort.hpp b/boost/compute/algorithm/detail/insertion_sort.hpp new file mode 100644 index 0000000000..4b5b95139a --- /dev/null +++ b/boost/compute/algorithm/detail/insertion_sort.hpp @@ -0,0 +1,165 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_INSERTION_SORT_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_INSERTION_SORT_HPP + +#include <boost/compute/kernel.hpp> +#include <boost/compute/program.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/memory/local_buffer.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class Iterator, class Compare> +inline void serial_insertion_sort(Iterator first, + Iterator last, + Compare compare, + command_queue &queue) +{ + typedef typename std::iterator_traits<Iterator>::value_type T; + + size_t count = iterator_range_size(first, last); + if(count < 2){ + return; + } + + meta_kernel k("serial_insertion_sort"); + size_t local_data_arg = k.add_arg<T *>(memory_object::local_memory, "data"); + size_t count_arg = k.add_arg<uint_>("n"); + + k << + // copy data to local memory + "for(uint i = 0; i < n; i++){\n" << + " data[i] = " << first[k.var<uint_>("i")] << ";\n" + "}\n" + + // sort data in local memory + "for(uint i = 1; i < n; i++){\n" << + " " << k.decl<const T>("value") << " = data[i];\n" << + " uint pos = i;\n" << + " while(pos > 0 && " << + compare(k.var<const T>("value"), + k.var<const T>("data[pos-1]")) << "){\n" << + " data[pos] = data[pos-1];\n" << + " pos--;\n" << + " }\n" << + " data[pos] = value;\n" << + "}\n" << + + // copy sorted data to output + "for(uint i = 0; i < n; i++){\n" << + " " << first[k.var<uint_>("i")] << " = data[i];\n" + "}\n"; + + const context &context = queue.get_context(); + ::boost::compute::kernel kernel = k.compile(context); + kernel.set_arg(local_data_arg, local_buffer<T>(count)); + kernel.set_arg(count_arg, static_cast<uint_>(count)); + + queue.enqueue_task(kernel); +} + +template<class Iterator> +inline void serial_insertion_sort(Iterator first, + Iterator last, + command_queue &queue) +{ + typedef typename std::iterator_traits<Iterator>::value_type T; + + ::boost::compute::less<T> less; + + return serial_insertion_sort(first, last, less, queue); +} + +template<class KeyIterator, class ValueIterator, class Compare> +inline void serial_insertion_sort_by_key(KeyIterator keys_first, + KeyIterator keys_last, + ValueIterator values_first, + Compare compare, + command_queue &queue) +{ + typedef typename std::iterator_traits<KeyIterator>::value_type key_type; + typedef typename std::iterator_traits<ValueIterator>::value_type value_type; + + size_t count = iterator_range_size(keys_first, keys_last); + if(count < 2){ + return; + } + + meta_kernel k("serial_insertion_sort_by_key"); + size_t local_keys_arg = k.add_arg<key_type *>(memory_object::local_memory, "keys"); + size_t local_data_arg = k.add_arg<value_type *>(memory_object::local_memory, "data"); + size_t count_arg = k.add_arg<uint_>("n"); + + k << + // copy data to local memory + "for(uint i = 0; i < n; i++){\n" << + " keys[i] = " << keys_first[k.var<uint_>("i")] << ";\n" + " data[i] = " << values_first[k.var<uint_>("i")] << ";\n" + "}\n" + + // sort data in local memory + "for(uint i = 1; i < n; i++){\n" << + " " << k.decl<const key_type>("key") << " = keys[i];\n" << + " " << k.decl<const value_type>("value") << " = data[i];\n" << + " uint pos = i;\n" << + " while(pos > 0 && " << + compare(k.var<const key_type>("key"), + k.var<const key_type>("keys[pos-1]")) << "){\n" << + " keys[pos] = keys[pos-1];\n" << + " data[pos] = data[pos-1];\n" << + " pos--;\n" << + " }\n" << + " keys[pos] = key;\n" << + " data[pos] = value;\n" << + "}\n" << + + // copy sorted data to output + "for(uint i = 0; i < n; i++){\n" << + " " << keys_first[k.var<uint_>("i")] << " = keys[i];\n" + " " << values_first[k.var<uint_>("i")] << " = data[i];\n" + "}\n"; + + const context &context = queue.get_context(); + ::boost::compute::kernel kernel = k.compile(context); + kernel.set_arg(local_keys_arg, static_cast<uint_>(count * sizeof(key_type)), 0); + kernel.set_arg(local_data_arg, static_cast<uint_>(count * sizeof(value_type)), 0); + kernel.set_arg(count_arg, static_cast<uint_>(count)); + + queue.enqueue_task(kernel); +} + +template<class KeyIterator, class ValueIterator> +inline void serial_insertion_sort_by_key(KeyIterator keys_first, + KeyIterator keys_last, + ValueIterator values_first, + command_queue &queue) +{ + typedef typename std::iterator_traits<KeyIterator>::value_type key_type; + + serial_insertion_sort_by_key( + keys_first, + keys_last, + values_first, + boost::compute::less<key_type>(), + queue + ); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_INSERTION_SORT_HPP diff --git a/boost/compute/algorithm/detail/merge_path.hpp b/boost/compute/algorithm/detail/merge_path.hpp new file mode 100644 index 0000000000..bc2c8fa88c --- /dev/null +++ b/boost/compute/algorithm/detail/merge_path.hpp @@ -0,0 +1,116 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_PATH_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_PATH_HPP + +#include <iterator> + +#include <boost/compute/algorithm/find_if.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/lambda.hpp> +#include <boost/compute/system.hpp> + +namespace boost { +namespace compute { +namespace detail { + +/// +/// \brief Merge Path kernel class +/// +/// Subclass of meta_kernel to break two sets into tiles according +/// to their merge path +/// +class merge_path_kernel : public meta_kernel +{ +public: + unsigned int tile_size; + + merge_path_kernel() : meta_kernel("merge_path") + { + tile_size = 4; + } + + template<class InputIterator1, class InputIterator2, + class OutputIterator1, class OutputIterator2, + class Compare> + void set_range(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator1 result_a, + OutputIterator2 result_b, + Compare comp) + { + m_a_count = iterator_range_size(first1, last1); + m_a_count_arg = add_arg<uint_>("a_count"); + + m_b_count = iterator_range_size(first2, last2); + m_b_count_arg = add_arg<uint_>("b_count"); + + *this << + "uint i = get_global_id(0);\n" << + "uint target = (i+1)*" << tile_size << ";\n" << + "uint start = max(convert_int(0),convert_int(target)-convert_int(b_count));\n" << + "uint end = min(target,a_count);\n" << + "uint a_index, b_index;\n" << + "while(start<end)\n" << + "{\n" << + " a_index = (start + end)/2;\n" << + " b_index = target - a_index - 1;\n" << + " if(!(" << comp(first2[expr<uint_>("b_index")], + first1[expr<uint_>("a_index")]) << "))\n" << + " start = a_index + 1;\n" << + " else end = a_index;\n" << + "}\n" << + result_a[expr<uint_>("i")] << " = start;\n" << + result_b[expr<uint_>("i")] << " = target - start;\n"; + } + + template<class InputIterator1, class InputIterator2, + class OutputIterator1, class OutputIterator2> + void set_range(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator1 result_a, + OutputIterator2 result_b) + { + typedef typename std::iterator_traits<InputIterator1>::value_type value_type; + ::boost::compute::less<value_type> less_than; + set_range(first1, last1, first2, last2, result_a, result_b, less_than); + } + + event exec(command_queue &queue) + { + if((m_a_count + m_b_count)/tile_size == 0) { + return event(); + } + + set_arg(m_a_count_arg, uint_(m_a_count)); + set_arg(m_b_count_arg, uint_(m_b_count)); + + return exec_1d(queue, 0, (m_a_count + m_b_count)/tile_size); + } + +private: + size_t m_a_count; + size_t m_a_count_arg; + size_t m_b_count; + size_t m_b_count_arg; +}; + +} //end detail namespace +} //end compute namespace +} //end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_PATH_HPP diff --git a/boost/compute/algorithm/detail/merge_sort_on_cpu.hpp b/boost/compute/algorithm/detail/merge_sort_on_cpu.hpp new file mode 100644 index 0000000000..f4b53f10ae --- /dev/null +++ b/boost/compute/algorithm/detail/merge_sort_on_cpu.hpp @@ -0,0 +1,366 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_SORT_ON_CPU_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_SORT_ON_CPU_HPP + +#include <boost/compute/kernel.hpp> +#include <boost/compute/program.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/detail/merge_with_merge_path.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class KeyIterator, class ValueIterator, class Compare> +inline void merge_blocks(KeyIterator keys_first, + ValueIterator values_first, + KeyIterator keys_result, + ValueIterator values_result, + Compare compare, + size_t count, + const size_t block_size, + const bool sort_by_key, + command_queue &queue) +{ + (void) values_result; + (void) values_first; + + meta_kernel k("merge_sort_on_cpu_merge_blocks"); + size_t count_arg = k.add_arg<const uint_>("count"); + size_t block_size_arg = k.add_arg<uint_>("block_size"); + + k << + k.decl<uint_>("b1_start") << " = get_global_id(0) * block_size * 2;\n" << + k.decl<uint_>("b1_end") << " = min(count, b1_start + block_size);\n" << + k.decl<uint_>("b2_start") << " = min(count, b1_start + block_size);\n" << + k.decl<uint_>("b2_end") << " = min(count, b2_start + block_size);\n" << + k.decl<uint_>("result_idx") << " = b1_start;\n" << + + // merging block 1 and block 2 (stable) + "while(b1_start < b1_end && b2_start < b2_end){\n" << + " if( " << compare(keys_first[k.var<uint_>("b2_start")], + keys_first[k.var<uint_>("b1_start")]) << "){\n" << + " " << keys_result[k.var<uint_>("result_idx")] << " = " << + keys_first[k.var<uint_>("b2_start")] << ";\n"; + if(sort_by_key){ + k << + " " << values_result[k.var<uint_>("result_idx")] << " = " << + values_first[k.var<uint_>("b2_start")] << ";\n"; + } + k << + " b2_start++;\n" << + " }\n" << + " else {\n" << + " " << keys_result[k.var<uint_>("result_idx")] << " = " << + keys_first[k.var<uint_>("b1_start")] << ";\n"; + if(sort_by_key){ + k << + " " << values_result[k.var<uint_>("result_idx")] << " = " << + values_first[k.var<uint_>("b1_start")] << ";\n"; + } + k << + " b1_start++;\n" << + " }\n" << + " result_idx++;\n" << + "}\n" << + "while(b1_start < b1_end){\n" << + " " << keys_result[k.var<uint_>("result_idx")] << " = " << + keys_first[k.var<uint_>("b1_start")] << ";\n"; + if(sort_by_key){ + k << + " " << values_result[k.var<uint_>("result_idx")] << " = " << + values_first[k.var<uint_>("b1_start")] << ";\n"; + } + k << + " b1_start++;\n" << + " result_idx++;\n" << + "}\n" << + "while(b2_start < b2_end){\n" << + " " << keys_result[k.var<uint_>("result_idx")] << " = " << + keys_first[k.var<uint_>("b2_start")] << ";\n"; + if(sort_by_key){ + k << + " " << values_result[k.var<uint_>("result_idx")] << " = " << + values_first[k.var<uint_>("b2_start")] << ";\n"; + } + k << + " b2_start++;\n" << + " result_idx++;\n" << + "}\n"; + + const context &context = queue.get_context(); + ::boost::compute::kernel kernel = k.compile(context); + kernel.set_arg(count_arg, static_cast<const uint_>(count)); + kernel.set_arg(block_size_arg, static_cast<uint_>(block_size)); + + const size_t global_size = static_cast<size_t>( + std::ceil(float(count) / (2 * block_size)) + ); + queue.enqueue_1d_range_kernel(kernel, 0, global_size, 0); +} + +template<class Iterator, class Compare> +inline void merge_blocks(Iterator first, + Iterator result, + Compare compare, + size_t count, + const size_t block_size, + const bool sort_by_key, + command_queue &queue) +{ + // dummy iterator as it's not sort by key + Iterator dummy; + merge_blocks(first, dummy, result, dummy, compare, count, block_size, false, queue); +} + +template<class Iterator, class Compare> +inline void dispatch_merge_blocks(Iterator first, + Iterator result, + Compare compare, + size_t count, + const size_t block_size, + const size_t input_size_threshold, + const size_t blocks_no_threshold, + command_queue &queue) +{ + const size_t blocks_no = static_cast<size_t>( + std::ceil(float(count) / block_size) + ); + // merge with merge path should used only for the large arrays and at the + // end of merging part when there are only a few big blocks left to be merged + if(blocks_no <= blocks_no_threshold && count >= input_size_threshold){ + Iterator last = first + count; + for(size_t i = 0; i < count; i+= 2*block_size) + { + Iterator first1 = (std::min)(first + i, last); + Iterator last1 = (std::min)(first1 + block_size, last); + Iterator first2 = last1; + Iterator last2 = (std::min)(first2 + block_size, last); + Iterator block_result = (std::min)(result + i, result + count); + merge_with_merge_path(first1, last1, first2, last2, + block_result, compare, queue); + } + } + else { + merge_blocks(first, result, compare, count, block_size, false, queue); + } +} + +template<class KeyIterator, class ValueIterator, class Compare> +inline void block_insertion_sort(KeyIterator keys_first, + ValueIterator values_first, + Compare compare, + const size_t count, + const size_t block_size, + const bool sort_by_key, + command_queue &queue) +{ + (void) values_first; + + typedef typename std::iterator_traits<KeyIterator>::value_type K; + typedef typename std::iterator_traits<ValueIterator>::value_type T; + + meta_kernel k("merge_sort_on_cpu_block_insertion_sort"); + size_t count_arg = k.add_arg<uint_>("count"); + size_t block_size_arg = k.add_arg<uint_>("block_size"); + + k << + k.decl<uint_>("start") << " = get_global_id(0) * block_size;\n" << + k.decl<uint_>("end") << " = min(count, start + block_size);\n" << + + // block insertion sort (stable) + "for(uint i = start+1; i < end; i++){\n" << + " " << k.decl<const K>("key") << " = " << + keys_first[k.var<uint_>("i")] << ";\n"; + if(sort_by_key){ + k << + " " << k.decl<const T>("value") << " = " << + values_first[k.var<uint_>("i")] << ";\n"; + } + k << + " uint pos = i;\n" << + " while(pos > start && " << + compare(k.var<const K>("key"), + keys_first[k.var<uint_>("pos-1")]) << "){\n" << + " " << keys_first[k.var<uint_>("pos")] << " = " << + keys_first[k.var<uint_>("pos-1")] << ";\n"; + if(sort_by_key){ + k << + " " << values_first[k.var<uint_>("pos")] << " = " << + values_first[k.var<uint_>("pos-1")] << ";\n"; + } + k << + " pos--;\n" << + " }\n" << + " " << keys_first[k.var<uint_>("pos")] << " = key;\n"; + if(sort_by_key) { + k << + " " << values_first[k.var<uint_>("pos")] << " = value;\n"; + } + k << + "}\n"; // block insertion sort + + const context &context = queue.get_context(); + ::boost::compute::kernel kernel = k.compile(context); + kernel.set_arg(count_arg, static_cast<uint_>(count)); + kernel.set_arg(block_size_arg, static_cast<uint_>(block_size)); + + const size_t global_size = static_cast<size_t>(std::ceil(float(count) / block_size)); + queue.enqueue_1d_range_kernel(kernel, 0, global_size, 0); +} + +template<class Iterator, class Compare> +inline void block_insertion_sort(Iterator first, + Compare compare, + const size_t count, + const size_t block_size, + command_queue &queue) +{ + // dummy iterator as it's not sort by key + Iterator dummy; + block_insertion_sort(first, dummy, compare, count, block_size, false, queue); +} + +// This sort is stable. +template<class Iterator, class Compare> +inline void merge_sort_on_cpu(Iterator first, + Iterator last, + Compare compare, + command_queue &queue) +{ + typedef typename std::iterator_traits<Iterator>::value_type value_type; + + size_t count = iterator_range_size(first, last); + if(count < 2){ + return; + } + // for small input size only insertion sort is performed + else if(count <= 512){ + block_insertion_sort(first, compare, count, count, queue); + return; + } + + const context &context = queue.get_context(); + const device &device = queue.get_device(); + + // loading parameters + std::string cache_key = + std::string("__boost_merge_sort_on_cpu_") + type_name<value_type>(); + boost::shared_ptr<parameter_cache> parameters = + detail::parameter_cache::get_global_cache(device); + + // When there is merge_with_path_blocks_no_threshold or less blocks left to + // merge AND input size is merge_with_merge_path_input_size_threshold or more + // merge_with_merge_path() algorithm is used to merge sorted blocks; + // otherwise merge_blocks() is used. + const size_t merge_with_path_blocks_no_threshold = + parameters->get(cache_key, "merge_with_merge_path_blocks_no_threshold", 8); + const size_t merge_with_path_input_size_threshold = + parameters->get(cache_key, "merge_with_merge_path_input_size_threshold", 2097152); + + const size_t block_size = + parameters->get(cache_key, "insertion_sort_block_size", 64); + block_insertion_sort(first, compare, count, block_size, queue); + + // temporary buffer for merge result + vector<value_type> temp(count, context); + bool result_in_temporary_buffer = false; + + for(size_t i = block_size; i < count; i *= 2){ + result_in_temporary_buffer = !result_in_temporary_buffer; + if(result_in_temporary_buffer) { + dispatch_merge_blocks(first, temp.begin(), compare, count, i, + merge_with_path_input_size_threshold, + merge_with_path_blocks_no_threshold, + queue); + } else { + dispatch_merge_blocks(temp.begin(), first, compare, count, i, + merge_with_path_input_size_threshold, + merge_with_path_blocks_no_threshold, + queue); + } + } + + if(result_in_temporary_buffer) { + copy(temp.begin(), temp.end(), first, queue); + } +} + +// This sort is stable. +template<class KeyIterator, class ValueIterator, class Compare> +inline void merge_sort_by_key_on_cpu(KeyIterator keys_first, + KeyIterator keys_last, + ValueIterator values_first, + Compare compare, + command_queue &queue) +{ + typedef typename std::iterator_traits<KeyIterator>::value_type key_type; + typedef typename std::iterator_traits<ValueIterator>::value_type value_type; + + size_t count = iterator_range_size(keys_first, keys_last); + if(count < 2){ + return; + } + // for small input size only insertion sort is performed + else if(count <= 512){ + block_insertion_sort(keys_first, values_first, compare, + count, count, true, queue); + return; + } + + const context &context = queue.get_context(); + const device &device = queue.get_device(); + + // loading parameters + std::string cache_key = + std::string("__boost_merge_sort_by_key_on_cpu_") + type_name<value_type>() + + "_with_" + type_name<key_type>(); + boost::shared_ptr<parameter_cache> parameters = + detail::parameter_cache::get_global_cache(device); + + const size_t block_size = + parameters->get(cache_key, "insertion_sort_by_key_block_size", 64); + block_insertion_sort(keys_first, values_first, compare, + count, block_size, true, queue); + + // temporary buffer for merge results + vector<value_type> values_temp(count, context); + vector<key_type> keys_temp(count, context); + bool result_in_temporary_buffer = false; + + for(size_t i = block_size; i < count; i *= 2){ + result_in_temporary_buffer = !result_in_temporary_buffer; + if(result_in_temporary_buffer) { + merge_blocks(keys_first, values_first, + keys_temp.begin(), values_temp.begin(), + compare, count, i, true, queue); + } else { + merge_blocks(keys_temp.begin(), values_temp.begin(), + keys_first, values_first, + compare, count, i, true, queue); + } + } + + if(result_in_temporary_buffer) { + copy(keys_temp.begin(), keys_temp.end(), keys_first, queue); + copy(values_temp.begin(), values_temp.end(), values_first, queue); + } +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_SORT_ON_CPU_HPP diff --git a/boost/compute/algorithm/detail/merge_with_merge_path.hpp b/boost/compute/algorithm/detail/merge_with_merge_path.hpp new file mode 100644 index 0000000000..c3cc5e8e9c --- /dev/null +++ b/boost/compute/algorithm/detail/merge_with_merge_path.hpp @@ -0,0 +1,203 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_WIH_MERGE_PATH_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_WIH_MERGE_PATH_HPP + +#include <iterator> + +#include <boost/compute/algorithm/detail/merge_path.hpp> +#include <boost/compute/algorithm/fill_n.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/system.hpp> + +namespace boost { +namespace compute { +namespace detail { + +/// +/// \brief Serial merge kernel class +/// +/// Subclass of meta_kernel to perform serial merge after tiling +/// +class serial_merge_kernel : meta_kernel +{ +public: + unsigned int tile_size; + + serial_merge_kernel() : meta_kernel("merge") + { + tile_size = 4; + } + + template<class InputIterator1, class InputIterator2, + class InputIterator3, class InputIterator4, + class OutputIterator, class Compare> + void set_range(InputIterator1 first1, + InputIterator2 first2, + InputIterator3 tile_first1, + InputIterator3 tile_last1, + InputIterator4 tile_first2, + OutputIterator result, + Compare comp) + { + m_count = iterator_range_size(tile_first1, tile_last1) - 1; + + *this << + "uint i = get_global_id(0);\n" << + "uint start1 = " << tile_first1[expr<uint_>("i")] << ";\n" << + "uint end1 = " << tile_first1[expr<uint_>("i+1")] << ";\n" << + "uint start2 = " << tile_first2[expr<uint_>("i")] << ";\n" << + "uint end2 = " << tile_first2[expr<uint_>("i+1")] << ";\n" << + "uint index = i*" << tile_size << ";\n" << + "while(start1<end1 && start2<end2)\n" << + "{\n" << + " if(!(" << comp(first2[expr<uint_>("start2")], + first1[expr<uint_>("start1")]) << "))\n" << + " {\n" << + result[expr<uint_>("index")] << + " = " << first1[expr<uint_>("start1")] << ";\n" << + " index++;\n" << + " start1++;\n" << + " }\n" << + " else\n" << + " {\n" << + result[expr<uint_>("index")] << + " = " << first2[expr<uint_>("start2")] << ";\n" << + " index++;\n" << + " start2++;\n" << + " }\n" << + "}\n" << + "while(start1<end1)\n" << + "{\n" << + result[expr<uint_>("index")] << + " = " << first1[expr<uint_>("start1")] << ";\n" << + " index++;\n" << + " start1++;\n" << + "}\n" << + "while(start2<end2)\n" << + "{\n" << + result[expr<uint_>("index")] << + " = " << first2[expr<uint_>("start2")] << ";\n" << + " index++;\n" << + " start2++;\n" << + "}\n"; + } + + template<class InputIterator1, class InputIterator2, + class InputIterator3, class InputIterator4, + class OutputIterator> + void set_range(InputIterator1 first1, + InputIterator2 first2, + InputIterator3 tile_first1, + InputIterator3 tile_last1, + InputIterator4 tile_first2, + OutputIterator result) + { + typedef typename std::iterator_traits<InputIterator1>::value_type value_type; + ::boost::compute::less<value_type> less_than; + set_range(first1, first2, tile_first1, tile_last1, tile_first2, result, less_than); + } + + event exec(command_queue &queue) + { + if(m_count == 0) { + return event(); + } + + return exec_1d(queue, 0, m_count); + } + +private: + size_t m_count; +}; + +/// +/// \brief Merge algorithm with merge path +/// +/// Merges the sorted values in the range [\p first1, \p last1) with +/// the sorted values in the range [\p first2, last2) and stores the +/// result in the range beginning at \p result +/// +/// \param first1 Iterator pointing to start of first set +/// \param last1 Iterator pointing to end of first set +/// \param first2 Iterator pointing to start of second set +/// \param last2 Iterator pointing to end of second set +/// \param result Iterator pointing to start of range in which the result +/// will be stored +/// \param comp Comparator which performs less than function +/// \param queue Queue on which to execute +/// +template<class InputIterator1, class InputIterator2, class OutputIterator, class Compare> +inline OutputIterator +merge_with_merge_path(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + Compare comp, + command_queue &queue = system::default_queue()) +{ + typedef typename + std::iterator_traits<OutputIterator>::difference_type result_difference_type; + + size_t tile_size = 1024; + + size_t count1 = iterator_range_size(first1, last1); + size_t count2 = iterator_range_size(first2, last2); + + vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); + vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); + + // Tile the sets + merge_path_kernel tiling_kernel; + tiling_kernel.tile_size = static_cast<unsigned int>(tile_size); + tiling_kernel.set_range(first1, last1, first2, last2, + tile_a.begin()+1, tile_b.begin()+1, comp); + fill_n(tile_a.begin(), 1, uint_(0), queue); + fill_n(tile_b.begin(), 1, uint_(0), queue); + tiling_kernel.exec(queue); + + fill_n(tile_a.end()-1, 1, static_cast<uint_>(count1), queue); + fill_n(tile_b.end()-1, 1, static_cast<uint_>(count2), queue); + + // Merge + serial_merge_kernel merge_kernel; + merge_kernel.tile_size = static_cast<unsigned int>(tile_size); + merge_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(), + tile_b.begin(), result, comp); + + merge_kernel.exec(queue); + + return result + static_cast<result_difference_type>(count1 + count2); +} + +/// \overload +template<class InputIterator1, class InputIterator2, class OutputIterator> +inline OutputIterator +merge_with_merge_path(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<InputIterator1>::value_type value_type; + ::boost::compute::less<value_type> less_than; + return merge_with_merge_path(first1, last1, first2, last2, result, less_than, queue); +} + +} //end detail namespace +} //end compute namespace +} //end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_WIH_MERGE_PATH_HPP diff --git a/boost/compute/algorithm/detail/radix_sort.hpp b/boost/compute/algorithm/detail/radix_sort.hpp new file mode 100644 index 0000000000..c2ba4ed17c --- /dev/null +++ b/boost/compute/algorithm/detail/radix_sort.hpp @@ -0,0 +1,415 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_RADIX_SORT_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_RADIX_SORT_HPP + +#include <iterator> + +#include <boost/assert.hpp> +#include <boost/type_traits/is_signed.hpp> +#include <boost/type_traits/is_floating_point.hpp> + +#include <boost/compute/kernel.hpp> +#include <boost/compute/program.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/exclusive_scan.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/detail/parameter_cache.hpp> +#include <boost/compute/type_traits/type_name.hpp> +#include <boost/compute/type_traits/is_fundamental.hpp> +#include <boost/compute/type_traits/is_vector_type.hpp> +#include <boost/compute/utility/program_cache.hpp> + +namespace boost { +namespace compute { +namespace detail { + +// meta-function returning true if type T is radix-sortable +template<class T> +struct is_radix_sortable : + boost::mpl::and_< + typename ::boost::compute::is_fundamental<T>::type, + typename boost::mpl::not_<typename is_vector_type<T>::type>::type + > +{ +}; + +template<size_t N> +struct radix_sort_value_type +{ +}; + +template<> +struct radix_sort_value_type<1> +{ + typedef uchar_ type; +}; + +template<> +struct radix_sort_value_type<2> +{ + typedef ushort_ type; +}; + +template<> +struct radix_sort_value_type<4> +{ + typedef uint_ type; +}; + +template<> +struct radix_sort_value_type<8> +{ + typedef ulong_ type; +}; + +template<typename T> +inline const char* enable_double() +{ + return " -DT2_double=0"; +} + +template<> +inline const char* enable_double<double>() +{ + return " -DT2_double=1"; +} + +const char radix_sort_source[] = +"#if T2_double\n" +"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" +"#endif\n" +"#define K2_BITS (1 << K_BITS)\n" +"#define RADIX_MASK ((((T)(1)) << K_BITS) - 1)\n" +"#define SIGN_BIT ((sizeof(T) * CHAR_BIT) - 1)\n" + +"inline uint radix(const T x, const uint low_bit)\n" +"{\n" +"#if defined(IS_FLOATING_POINT)\n" +" const T mask = -(x >> SIGN_BIT) | (((T)(1)) << SIGN_BIT);\n" +" return ((x ^ mask) >> low_bit) & RADIX_MASK;\n" +"#elif defined(IS_SIGNED)\n" +" return ((x ^ (((T)(1)) << SIGN_BIT)) >> low_bit) & RADIX_MASK;\n" +"#else\n" +" return (x >> low_bit) & RADIX_MASK;\n" +"#endif\n" +"}\n" + +"__kernel void count(__global const T *input,\n" +" const uint input_offset,\n" +" const uint input_size,\n" +" __global uint *global_counts,\n" +" __global uint *global_offsets,\n" +" __local uint *local_counts,\n" +" const uint low_bit)\n" +"{\n" + // work-item parameters +" const uint gid = get_global_id(0);\n" +" const uint lid = get_local_id(0);\n" + + // zero local counts +" if(lid < K2_BITS){\n" +" local_counts[lid] = 0;\n" +" }\n" +" barrier(CLK_LOCAL_MEM_FENCE);\n" + + // reduce local counts +" if(gid < input_size){\n" +" T value = input[input_offset+gid];\n" +" uint bucket = radix(value, low_bit);\n" +" atomic_inc(local_counts + bucket);\n" +" }\n" +" barrier(CLK_LOCAL_MEM_FENCE);\n" + + // write block-relative offsets +" if(lid < K2_BITS){\n" +" global_counts[K2_BITS*get_group_id(0) + lid] = local_counts[lid];\n" + + // write global offsets +" if(get_group_id(0) == (get_num_groups(0) - 1)){\n" +" global_offsets[lid] = local_counts[lid];\n" +" }\n" +" }\n" +"}\n" + +"__kernel void scan(__global const uint *block_offsets,\n" +" __global uint *global_offsets,\n" +" const uint block_count)\n" +"{\n" +" __global const uint *last_block_offsets =\n" +" block_offsets + K2_BITS * (block_count - 1);\n" + + // calculate and scan global_offsets +" uint sum = 0;\n" +" for(uint i = 0; i < K2_BITS; i++){\n" +" uint x = global_offsets[i] + last_block_offsets[i];\n" +" global_offsets[i] = sum;\n" +" sum += x;\n" +" }\n" +"}\n" + +"__kernel void scatter(__global const T *input,\n" +" const uint input_offset,\n" +" const uint input_size,\n" +" const uint low_bit,\n" +" __global const uint *counts,\n" +" __global const uint *global_offsets,\n" +"#ifndef SORT_BY_KEY\n" +" __global T *output,\n" +" const uint output_offset)\n" +"#else\n" +" __global T *keys_output,\n" +" const uint keys_output_offset,\n" +" __global T2 *values_input,\n" +" const uint values_input_offset,\n" +" __global T2 *values_output,\n" +" const uint values_output_offset)\n" +"#endif\n" +"{\n" + // work-item parameters +" const uint gid = get_global_id(0);\n" +" const uint lid = get_local_id(0);\n" + + // copy input to local memory +" T value;\n" +" uint bucket;\n" +" __local uint local_input[BLOCK_SIZE];\n" +" if(gid < input_size){\n" +" value = input[input_offset+gid];\n" +" bucket = radix(value, low_bit);\n" +" local_input[lid] = bucket;\n" +" }\n" + + // copy block counts to local memory +" __local uint local_counts[(1 << K_BITS)];\n" +" if(lid < K2_BITS){\n" +" local_counts[lid] = counts[get_group_id(0) * K2_BITS + lid];\n" +" }\n" + + // wait until local memory is ready +" barrier(CLK_LOCAL_MEM_FENCE);\n" + +" if(gid >= input_size){\n" +" return;\n" +" }\n" + + // get global offset +" uint offset = global_offsets[bucket] + local_counts[bucket];\n" + + // calculate local offset +" uint local_offset = 0;\n" +" for(uint i = 0; i < lid; i++){\n" +" if(local_input[i] == bucket)\n" +" local_offset++;\n" +" }\n" + +"#ifndef SORT_BY_KEY\n" + // write value to output +" output[output_offset + offset + local_offset] = value;\n" +"#else\n" + // write key and value if doing sort_by_key +" keys_output[keys_output_offset+offset + local_offset] = value;\n" +" values_output[values_output_offset+offset + local_offset] =\n" +" values_input[values_input_offset+gid];\n" +"#endif\n" +"}\n"; + +template<class T, class T2> +inline void radix_sort_impl(const buffer_iterator<T> first, + const buffer_iterator<T> last, + const buffer_iterator<T2> values_first, + command_queue &queue) +{ + + typedef T value_type; + typedef typename radix_sort_value_type<sizeof(T)>::type sort_type; + + const device &device = queue.get_device(); + const context &context = queue.get_context(); + + + // if we have a valid values iterator then we are doing a + // sort by key and have to set up the values buffer + bool sort_by_key = (values_first.get_buffer().get() != 0); + + // load (or create) radix sort program + std::string cache_key = + std::string("__boost_radix_sort_") + type_name<value_type>(); + + if(sort_by_key){ + cache_key += std::string("_with_") + type_name<T2>(); + } + + boost::shared_ptr<program_cache> cache = + program_cache::get_global_cache(context); + boost::shared_ptr<parameter_cache> parameters = + detail::parameter_cache::get_global_cache(device); + + // sort parameters + const uint_ k = parameters->get(cache_key, "k", 4); + const uint_ k2 = 1 << k; + const uint_ block_size = parameters->get(cache_key, "tpb", 128); + + // sort program compiler options + std::stringstream options; + options << "-DK_BITS=" << k; + options << " -DT=" << type_name<sort_type>(); + options << " -DBLOCK_SIZE=" << block_size; + + if(boost::is_floating_point<value_type>::value){ + options << " -DIS_FLOATING_POINT"; + } + + if(boost::is_signed<value_type>::value){ + options << " -DIS_SIGNED"; + } + + if(sort_by_key){ + options << " -DSORT_BY_KEY"; + options << " -DT2=" << type_name<T2>(); + options << enable_double<T2>(); + } + + // load radix sort program + program radix_sort_program = cache->get_or_build( + cache_key, options.str(), radix_sort_source, context + ); + + kernel count_kernel(radix_sort_program, "count"); + kernel scan_kernel(radix_sort_program, "scan"); + kernel scatter_kernel(radix_sort_program, "scatter"); + + size_t count = detail::iterator_range_size(first, last); + + uint_ block_count = static_cast<uint_>(count / block_size); + if(block_count * block_size != count){ + block_count++; + } + + // setup temporary buffers + vector<value_type> output(count, context); + vector<T2> values_output(sort_by_key ? count : 0, context); + vector<uint_> offsets(k2, context); + vector<uint_> counts(block_count * k2, context); + + const buffer *input_buffer = &first.get_buffer(); + uint_ input_offset = static_cast<uint_>(first.get_index()); + const buffer *output_buffer = &output.get_buffer(); + uint_ output_offset = 0; + const buffer *values_input_buffer = &values_first.get_buffer(); + uint_ values_input_offset = static_cast<uint_>(values_first.get_index()); + const buffer *values_output_buffer = &values_output.get_buffer(); + uint_ values_output_offset = 0; + + for(uint_ i = 0; i < sizeof(sort_type) * CHAR_BIT / k; i++){ + // write counts + count_kernel.set_arg(0, *input_buffer); + count_kernel.set_arg(1, input_offset); + count_kernel.set_arg(2, static_cast<uint_>(count)); + count_kernel.set_arg(3, counts); + count_kernel.set_arg(4, offsets); + count_kernel.set_arg(5, block_size * sizeof(uint_), 0); + count_kernel.set_arg(6, i * k); + queue.enqueue_1d_range_kernel(count_kernel, + 0, + block_count * block_size, + block_size); + + // scan counts + if(k == 1){ + typedef uint2_ counter_type; + ::boost::compute::exclusive_scan( + make_buffer_iterator<counter_type>(counts.get_buffer(), 0), + make_buffer_iterator<counter_type>(counts.get_buffer(), counts.size() / 2), + make_buffer_iterator<counter_type>(counts.get_buffer()), + queue + ); + } + else if(k == 2){ + typedef uint4_ counter_type; + ::boost::compute::exclusive_scan( + make_buffer_iterator<counter_type>(counts.get_buffer(), 0), + make_buffer_iterator<counter_type>(counts.get_buffer(), counts.size() / 4), + make_buffer_iterator<counter_type>(counts.get_buffer()), + queue + ); + } + else if(k == 4){ + typedef uint16_ counter_type; + ::boost::compute::exclusive_scan( + make_buffer_iterator<counter_type>(counts.get_buffer(), 0), + make_buffer_iterator<counter_type>(counts.get_buffer(), counts.size() / 16), + make_buffer_iterator<counter_type>(counts.get_buffer()), + queue + ); + } + else { + BOOST_ASSERT(false && "unknown k"); + break; + } + + // scan global offsets + scan_kernel.set_arg(0, counts); + scan_kernel.set_arg(1, offsets); + scan_kernel.set_arg(2, block_count); + queue.enqueue_task(scan_kernel); + + // scatter values + scatter_kernel.set_arg(0, *input_buffer); + scatter_kernel.set_arg(1, input_offset); + scatter_kernel.set_arg(2, static_cast<uint_>(count)); + scatter_kernel.set_arg(3, i * k); + scatter_kernel.set_arg(4, counts); + scatter_kernel.set_arg(5, offsets); + scatter_kernel.set_arg(6, *output_buffer); + scatter_kernel.set_arg(7, output_offset); + if(sort_by_key){ + scatter_kernel.set_arg(8, *values_input_buffer); + scatter_kernel.set_arg(9, values_input_offset); + scatter_kernel.set_arg(10, *values_output_buffer); + scatter_kernel.set_arg(11, values_output_offset); + } + queue.enqueue_1d_range_kernel(scatter_kernel, + 0, + block_count * block_size, + block_size); + + // swap buffers + std::swap(input_buffer, output_buffer); + std::swap(values_input_buffer, values_output_buffer); + std::swap(input_offset, output_offset); + std::swap(values_input_offset, values_output_offset); + } +} + +template<class Iterator> +inline void radix_sort(Iterator first, + Iterator last, + command_queue &queue) +{ + radix_sort_impl(first, last, buffer_iterator<int>(), queue); +} + +template<class KeyIterator, class ValueIterator> +inline void radix_sort_by_key(KeyIterator keys_first, + KeyIterator keys_last, + ValueIterator values_first, + command_queue &queue) +{ + radix_sort_impl(keys_first, keys_last, values_first, queue); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_RADIX_SORT_HPP diff --git a/boost/compute/algorithm/detail/random_fill.hpp b/boost/compute/algorithm/detail/random_fill.hpp new file mode 100644 index 0000000000..5c3827a9f8 --- /dev/null +++ b/boost/compute/algorithm/detail/random_fill.hpp @@ -0,0 +1,57 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_RANDOM_FILL_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_RANDOM_FILL_HPP + +#include <iterator> + +#include <boost/compute/command_queue.hpp> +#include <boost/compute/random/default_random_engine.hpp> +#include <boost/compute/random/uniform_real_distribution.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class OutputIterator, class Generator> +inline void random_fill(OutputIterator first, + OutputIterator last, + Generator &g, + command_queue &queue) +{ + g.fill(first, last, queue); +} + +template<class OutputIterator> +inline void +random_fill(OutputIterator first, + OutputIterator last, + typename std::iterator_traits<OutputIterator>::value_type lo, + typename std::iterator_traits<OutputIterator>::value_type hi, + command_queue &queue) +{ + typedef typename + std::iterator_traits<OutputIterator>::value_type value_type; + typedef typename + boost::compute::default_random_engine engine_type; + typedef typename + boost::compute::uniform_real_distribution<value_type> distribution_type; + + engine_type engine(queue); + distribution_type generator(lo, hi); + generator.fill(first, last, engine, queue); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_RANDOM_FILL_HPP diff --git a/boost/compute/algorithm/detail/reduce_by_key.hpp b/boost/compute/algorithm/detail/reduce_by_key.hpp new file mode 100644 index 0000000000..65844c9ebf --- /dev/null +++ b/boost/compute/algorithm/detail/reduce_by_key.hpp @@ -0,0 +1,119 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_BY_KEY_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_BY_KEY_HPP + +#include <algorithm> +#include <iterator> + +#include <boost/compute/command_queue.hpp> +#include <boost/compute/functional.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/algorithm/detail/serial_reduce_by_key.hpp> +#include <boost/compute/algorithm/detail/reduce_by_key_with_scan.hpp> +#include <boost/compute/type_traits.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class InputKeyIterator, class InputValueIterator, + class OutputKeyIterator, class OutputValueIterator, + class BinaryFunction, class BinaryPredicate> +size_t reduce_by_key_on_gpu(InputKeyIterator keys_first, + InputKeyIterator keys_last, + InputValueIterator values_first, + OutputKeyIterator keys_result, + OutputValueIterator values_result, + BinaryFunction function, + BinaryPredicate predicate, + command_queue &queue) +{ + return detail::reduce_by_key_with_scan(keys_first, keys_last, values_first, + keys_result, values_result, function, + predicate, queue); +} + +template<class InputKeyIterator, class InputValueIterator, + class OutputKeyIterator, class OutputValueIterator> +bool reduce_by_key_on_gpu_requirements_met(InputKeyIterator keys_first, + InputValueIterator values_first, + OutputKeyIterator keys_result, + OutputValueIterator values_result, + const size_t count, + command_queue &queue) +{ + const device &device = queue.get_device(); + return (count > 256) + && !(device.type() & device::cpu) + && reduce_by_key_with_scan_requirements_met(keys_first, values_first, + keys_result,values_result, + count, queue); + return true; +} + +template<class InputKeyIterator, class InputValueIterator, + class OutputKeyIterator, class OutputValueIterator, + class BinaryFunction, class BinaryPredicate> +inline std::pair<OutputKeyIterator, OutputValueIterator> +dispatch_reduce_by_key(InputKeyIterator keys_first, + InputKeyIterator keys_last, + InputValueIterator values_first, + OutputKeyIterator keys_result, + OutputValueIterator values_result, + BinaryFunction function, + BinaryPredicate predicate, + command_queue &queue) +{ + typedef typename + std::iterator_traits<OutputKeyIterator>::difference_type key_difference_type; + typedef typename + std::iterator_traits<OutputValueIterator>::difference_type value_difference_type; + + const size_t count = detail::iterator_range_size(keys_first, keys_last); + if (count < 2) { + boost::compute::copy_n(keys_first, count, keys_result, queue); + boost::compute::copy_n(values_first, count, values_result, queue); + return + std::make_pair<OutputKeyIterator, OutputValueIterator>( + keys_result + static_cast<key_difference_type>(count), + values_result + static_cast<value_difference_type>(count) + ); + } + + size_t result_size = 0; + if(reduce_by_key_on_gpu_requirements_met(keys_first, values_first, keys_result, + values_result, count, queue)){ + result_size = + detail::reduce_by_key_on_gpu(keys_first, keys_last, values_first, + keys_result, values_result, function, + predicate, queue); + } + else { + result_size = + detail::serial_reduce_by_key(keys_first, keys_last, values_first, + keys_result, values_result, function, + predicate, queue); + } + + return + std::make_pair<OutputKeyIterator, OutputValueIterator>( + keys_result + static_cast<key_difference_type>(result_size), + values_result + static_cast<value_difference_type>(result_size) + ); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_BY_KEY_HPP diff --git a/boost/compute/algorithm/detail/reduce_by_key_with_scan.hpp b/boost/compute/algorithm/detail/reduce_by_key_with_scan.hpp new file mode 100644 index 0000000000..e6852a67eb --- /dev/null +++ b/boost/compute/algorithm/detail/reduce_by_key_with_scan.hpp @@ -0,0 +1,541 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_BY_KEY_WITH_SCAN_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_BY_KEY_WITH_SCAN_HPP + +#include <algorithm> +#include <iterator> + +#include <boost/compute/command_queue.hpp> +#include <boost/compute/functional.hpp> +#include <boost/compute/algorithm/inclusive_scan.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/container/detail/scalar.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/detail/read_write_single_value.hpp> +#include <boost/compute/type_traits.hpp> +#include <boost/compute/utility/program_cache.hpp> + +namespace boost { +namespace compute { +namespace detail { + +/// \internal_ +/// +/// Fills \p new_keys_first with unsigned integer keys generated from vector +/// of original keys \p keys_first. New keys can be distinguish by simple equality +/// predicate. +/// +/// \param keys_first iterator pointing to the first key +/// \param number_of_keys number of keys +/// \param predicate binary predicate for key comparison +/// \param new_keys_first iterator pointing to the new keys vector +/// \param preferred_work_group_size preferred work group size +/// \param queue command queue to perform the operation +/// +/// Binary function \p predicate must take two keys as arguments and +/// return true only if they are considered the same. +/// +/// The first new key equals zero and the last equals number of unique keys +/// minus one. +/// +/// No local memory usage. +template<class InputKeyIterator, class BinaryPredicate> +inline void generate_uint_keys(InputKeyIterator keys_first, + size_t number_of_keys, + BinaryPredicate predicate, + vector<uint_>::iterator new_keys_first, + size_t preferred_work_group_size, + command_queue &queue) +{ + typedef typename + std::iterator_traits<InputKeyIterator>::value_type key_type; + + detail::meta_kernel k("reduce_by_key_new_key_flags"); + k.add_set_arg<const uint_>("count", uint_(number_of_keys)); + + k << + k.decl<const uint_>("gid") << " = get_global_id(0);\n" << + k.decl<uint_>("value") << " = 0;\n" << + "if(gid >= count){\n return;\n}\n" << + "if(gid > 0){ \n" << + k.decl<key_type>("key") << " = " << + keys_first[k.var<const uint_>("gid")] << ";\n" << + k.decl<key_type>("previous_key") << " = " << + keys_first[k.var<const uint_>("gid - 1")] << ";\n" << + " value = " << predicate(k.var<key_type>("previous_key"), + k.var<key_type>("key")) << + " ? 0 : 1;\n" << + "}\n else {\n" << + " value = 0;\n" << + "}\n" << + new_keys_first[k.var<const uint_>("gid")] << " = value;\n"; + + const context &context = queue.get_context(); + kernel kernel = k.compile(context); + + size_t work_group_size = preferred_work_group_size; + size_t work_groups_no = static_cast<size_t>( + std::ceil(float(number_of_keys) / work_group_size) + ); + + queue.enqueue_1d_range_kernel(kernel, + 0, + work_groups_no * work_group_size, + work_group_size); + + inclusive_scan(new_keys_first, new_keys_first + number_of_keys, + new_keys_first, queue); +} + +/// \internal_ +/// Calculate carry-out for each work group. +/// Carry-out is a pair of the last key processed by a work group and sum of all +/// values under this key in this work group. +template<class InputValueIterator, class OutputValueIterator, class BinaryFunction> +inline void carry_outs(vector<uint_>::iterator keys_first, + InputValueIterator values_first, + size_t count, + vector<uint_>::iterator carry_out_keys_first, + OutputValueIterator carry_out_values_first, + BinaryFunction function, + size_t work_group_size, + command_queue &queue) +{ + typedef typename + std::iterator_traits<OutputValueIterator>::value_type value_out_type; + + detail::meta_kernel k("reduce_by_key_with_scan_carry_outs"); + k.add_set_arg<const uint_>("count", uint_(count)); + size_t local_keys_arg = k.add_arg<uint_ *>(memory_object::local_memory, "lkeys"); + size_t local_vals_arg = k.add_arg<value_out_type *>(memory_object::local_memory, "lvals"); + + k << + k.decl<const uint_>("gid") << " = get_global_id(0);\n" << + k.decl<const uint_>("wg_size") << " = get_local_size(0);\n" << + k.decl<const uint_>("lid") << " = get_local_id(0);\n" << + k.decl<const uint_>("group_id") << " = get_group_id(0);\n" << + + k.decl<uint_>("key") << ";\n" << + k.decl<value_out_type>("value") << ";\n" << + "if(gid < count){\n" << + k.var<uint_>("key") << " = " << + keys_first[k.var<const uint_>("gid")] << ";\n" << + k.var<value_out_type>("value") << " = " << + values_first[k.var<const uint_>("gid")] << ";\n" << + "lkeys[lid] = key;\n" << + "lvals[lid] = value;\n" << + "}\n" << + + // Calculate carry out for each work group by performing Hillis/Steele scan + // where only last element (key-value pair) is saved + k.decl<value_out_type>("result") << " = value;\n" << + k.decl<uint_>("other_key") << ";\n" << + k.decl<value_out_type>("other_value") << ";\n" << + + "for(" << k.decl<uint_>("offset") << " = 1; " << + "offset < wg_size; offset *= 2){\n" + " barrier(CLK_LOCAL_MEM_FENCE);\n" << + " if(lid >= offset){\n" + " other_key = lkeys[lid - offset];\n" << + " if(other_key == key){\n" << + " other_value = lvals[lid - offset];\n" << + " result = " << function(k.var<value_out_type>("result"), + k.var<value_out_type>("other_value")) << ";\n" << + " }\n" << + " }\n" << + " barrier(CLK_LOCAL_MEM_FENCE);\n" << + " lvals[lid] = result;\n" << + "}\n" << + + // save carry out + "if(lid == (wg_size - 1)){\n" << + carry_out_keys_first[k.var<const uint_>("group_id")] << " = key;\n" << + carry_out_values_first[k.var<const uint_>("group_id")] << " = result;\n" << + "}\n"; + + size_t work_groups_no = static_cast<size_t>( + std::ceil(float(count) / work_group_size) + ); + + const context &context = queue.get_context(); + kernel kernel = k.compile(context); + kernel.set_arg(local_keys_arg, local_buffer<uint_>(work_group_size)); + kernel.set_arg(local_vals_arg, local_buffer<value_out_type>(work_group_size)); + + queue.enqueue_1d_range_kernel(kernel, + 0, + work_groups_no * work_group_size, + work_group_size); +} + +/// \internal_ +/// Calculate carry-in by performing inclusive scan by key on carry-outs vector. +template<class OutputValueIterator, class BinaryFunction> +inline void carry_ins(vector<uint_>::iterator carry_out_keys_first, + OutputValueIterator carry_out_values_first, + OutputValueIterator carry_in_values_first, + size_t carry_out_size, + BinaryFunction function, + size_t work_group_size, + command_queue &queue) +{ + typedef typename + std::iterator_traits<OutputValueIterator>::value_type value_out_type; + + uint_ values_pre_work_item = static_cast<uint_>( + std::ceil(float(carry_out_size) / work_group_size) + ); + + detail::meta_kernel k("reduce_by_key_with_scan_carry_ins"); + k.add_set_arg<const uint_>("carry_out_size", uint_(carry_out_size)); + k.add_set_arg<const uint_>("values_per_work_item", values_pre_work_item); + size_t local_keys_arg = k.add_arg<uint_ *>(memory_object::local_memory, "lkeys"); + size_t local_vals_arg = k.add_arg<value_out_type *>(memory_object::local_memory, "lvals"); + + k << + k.decl<uint_>("id") << " = get_global_id(0) * values_per_work_item;\n" << + k.decl<uint_>("idx") << " = id;\n" << + k.decl<const uint_>("wg_size") << " = get_local_size(0);\n" << + k.decl<const uint_>("lid") << " = get_local_id(0);\n" << + k.decl<const uint_>("group_id") << " = get_group_id(0);\n" << + + k.decl<uint_>("key") << ";\n" << + k.decl<value_out_type>("value") << ";\n" << + k.decl<uint_>("previous_key") << ";\n" << + k.decl<value_out_type>("result") << ";\n" << + + "if(id < carry_out_size){\n" << + k.var<uint_>("previous_key") << " = " << + carry_out_keys_first[k.var<const uint_>("id")] << ";\n" << + k.var<value_out_type>("result") << " = " << + carry_out_values_first[k.var<const uint_>("id")] << ";\n" << + carry_in_values_first[k.var<const uint_>("id")] << " = result;\n" << + "}\n" << + + k.decl<const uint_>("end") << " = (id + values_per_work_item) <= carry_out_size" << + " ? (values_per_work_item + id) : carry_out_size;\n" << + + "for(idx = idx + 1; idx < end; idx += 1){\n" << + " key = " << carry_out_keys_first[k.var<const uint_>("idx")] << ";\n" << + " value = " << carry_out_values_first[k.var<const uint_>("idx")] << ";\n" << + " if(previous_key == key){\n" << + " result = " << function(k.var<value_out_type>("result"), + k.var<value_out_type>("value")) << ";\n" << + " }\n else { \n" << + " result = value;\n" + " }\n" << + " " << carry_in_values_first[k.var<const uint_>("idx")] << " = result;\n" << + " previous_key = key;\n" + "}\n" << + + // save the last key and result to local memory + "lkeys[lid] = previous_key;\n" << + "lvals[lid] = result;\n" << + + // Hillis/Steele scan + "for(" << k.decl<uint_>("offset") << " = 1; " << + "offset < wg_size; offset *= 2){\n" + " barrier(CLK_LOCAL_MEM_FENCE);\n" << + " if(lid >= offset){\n" + " key = lkeys[lid - offset];\n" << + " if(previous_key == key){\n" << + " value = lvals[lid - offset];\n" << + " result = " << function(k.var<value_out_type>("result"), + k.var<value_out_type>("value")) << ";\n" << + " }\n" << + " }\n" << + " barrier(CLK_LOCAL_MEM_FENCE);\n" << + " lvals[lid] = result;\n" << + "}\n" << + "barrier(CLK_LOCAL_MEM_FENCE);\n" << + + "if(lid > 0){\n" << + // load key-value reduced by previous work item + " previous_key = lkeys[lid - 1];\n" << + " result = lvals[lid - 1];\n" << + "}\n" << + + // add key-value reduced by previous work item + "for(idx = id; idx < id + values_per_work_item; idx += 1){\n" << + // make sure all carry-ins are saved in global memory + " barrier( CLK_GLOBAL_MEM_FENCE );\n" << + " if(lid > 0 && idx < carry_out_size) {\n" + " key = " << carry_out_keys_first[k.var<const uint_>("idx")] << ";\n" << + " value = " << carry_in_values_first[k.var<const uint_>("idx")] << ";\n" << + " if(previous_key == key){\n" << + " value = " << function(k.var<value_out_type>("result"), + k.var<value_out_type>("value")) << ";\n" << + " }\n" << + " " << carry_in_values_first[k.var<const uint_>("idx")] << " = value;\n" << + " }\n" << + "}\n"; + + + const context &context = queue.get_context(); + kernel kernel = k.compile(context); + kernel.set_arg(local_keys_arg, local_buffer<uint_>(work_group_size)); + kernel.set_arg(local_vals_arg, local_buffer<value_out_type>(work_group_size)); + + queue.enqueue_1d_range_kernel(kernel, + 0, + work_group_size, + work_group_size); +} + +/// \internal_ +/// +/// Perform final reduction by key. Each work item: +/// 1. Perform local work-group reduction (Hillis/Steele scan) +/// 2. Add carry-in (if keys are right) +/// 3. Save reduced value if next key is different than processed one +template<class InputKeyIterator, class InputValueIterator, + class OutputKeyIterator, class OutputValueIterator, + class BinaryFunction> +inline void final_reduction(InputKeyIterator keys_first, + InputValueIterator values_first, + OutputKeyIterator keys_result, + OutputValueIterator values_result, + size_t count, + BinaryFunction function, + vector<uint_>::iterator new_keys_first, + vector<uint_>::iterator carry_in_keys_first, + OutputValueIterator carry_in_values_first, + size_t carry_in_size, + size_t work_group_size, + command_queue &queue) +{ + typedef typename + std::iterator_traits<OutputValueIterator>::value_type value_out_type; + + detail::meta_kernel k("reduce_by_key_with_scan_final_reduction"); + k.add_set_arg<const uint_>("count", uint_(count)); + size_t local_keys_arg = k.add_arg<uint_ *>(memory_object::local_memory, "lkeys"); + size_t local_vals_arg = k.add_arg<value_out_type *>(memory_object::local_memory, "lvals"); + + k << + k.decl<const uint_>("gid") << " = get_global_id(0);\n" << + k.decl<const uint_>("wg_size") << " = get_local_size(0);\n" << + k.decl<const uint_>("lid") << " = get_local_id(0);\n" << + k.decl<const uint_>("group_id") << " = get_group_id(0);\n" << + + k.decl<uint_>("key") << ";\n" << + k.decl<value_out_type>("value") << ";\n" + + "if(gid < count){\n" << + k.var<uint_>("key") << " = " << + new_keys_first[k.var<const uint_>("gid")] << ";\n" << + k.var<value_out_type>("value") << " = " << + values_first[k.var<const uint_>("gid")] << ";\n" << + "lkeys[lid] = key;\n" << + "lvals[lid] = value;\n" << + "}\n" << + + // Hillis/Steele scan + k.decl<value_out_type>("result") << " = value;\n" << + k.decl<uint_>("other_key") << ";\n" << + k.decl<value_out_type>("other_value") << ";\n" << + + "for(" << k.decl<uint_>("offset") << " = 1; " << + "offset < wg_size ; offset *= 2){\n" + " barrier(CLK_LOCAL_MEM_FENCE);\n" << + " if(lid >= offset) {\n" << + " other_key = lkeys[lid - offset];\n" << + " if(other_key == key){\n" << + " other_value = lvals[lid - offset];\n" << + " result = " << function(k.var<value_out_type>("result"), + k.var<value_out_type>("other_value")) << ";\n" << + " }\n" << + " }\n" << + " barrier(CLK_LOCAL_MEM_FENCE);\n" << + " lvals[lid] = result;\n" << + "}\n" << + + "if(gid >= count) {\n return;\n};\n" << + + k.decl<const bool>("save") << " = (gid < (count - 1)) ?" + << new_keys_first[k.var<const uint_>("gid + 1")] << " != key" << + ": true;\n" << + + // Add carry in + k.decl<uint_>("carry_in_key") << ";\n" << + "if(group_id > 0 && save) {\n" << + " carry_in_key = " << carry_in_keys_first[k.var<const uint_>("group_id - 1")] << ";\n" << + " if(key == carry_in_key){\n" << + " other_value = " << carry_in_values_first[k.var<const uint_>("group_id - 1")] << ";\n" << + " result = " << function(k.var<value_out_type>("result"), + k.var<value_out_type>("other_value")) << ";\n" << + " }\n" << + "}\n" << + + // Save result only if the next key is different or it's the last element. + "if(save){\n" << + keys_result[k.var<uint_>("key")] << " = " << keys_first[k.var<const uint_>("gid")] << ";\n" << + values_result[k.var<uint_>("key")] << " = result;\n" << + "}\n" + ; + + size_t work_groups_no = static_cast<size_t>( + std::ceil(float(count) / work_group_size) + ); + + const context &context = queue.get_context(); + kernel kernel = k.compile(context); + kernel.set_arg(local_keys_arg, local_buffer<uint_>(work_group_size)); + kernel.set_arg(local_vals_arg, local_buffer<value_out_type>(work_group_size)); + + queue.enqueue_1d_range_kernel(kernel, + 0, + work_groups_no * work_group_size, + work_group_size); +} + +/// \internal_ +/// Returns preferred work group size for reduce by key with scan algorithm. +template<class KeyType, class ValueType> +inline size_t get_work_group_size(const device& device) +{ + std::string cache_key = std::string("__boost_reduce_by_key_with_scan") + + "k_" + type_name<KeyType>() + "_v_" + type_name<ValueType>(); + + // load parameters + boost::shared_ptr<parameter_cache> parameters = + detail::parameter_cache::get_global_cache(device); + + return (std::max)( + static_cast<size_t>(parameters->get(cache_key, "wgsize", 256)), + static_cast<size_t>(device.get_info<CL_DEVICE_MAX_WORK_GROUP_SIZE>()) + ); +} + +/// \internal_ +/// +/// 1. For each work group carry-out value is calculated (it's done by key-oriented +/// Hillis/Steele scan). Carry-out is a pair of the last key processed by work +/// group and sum of all values under this key in work group. +/// 2. From every carry-out carry-in is calculated by performing inclusive scan +/// by key. +/// 3. Final reduction by key is performed (key-oriented Hillis/Steele scan), +/// carry-in values are added where needed. +template<class InputKeyIterator, class InputValueIterator, + class OutputKeyIterator, class OutputValueIterator, + class BinaryFunction, class BinaryPredicate> +inline size_t reduce_by_key_with_scan(InputKeyIterator keys_first, + InputKeyIterator keys_last, + InputValueIterator values_first, + OutputKeyIterator keys_result, + OutputValueIterator values_result, + BinaryFunction function, + BinaryPredicate predicate, + command_queue &queue) +{ + typedef typename + std::iterator_traits<InputValueIterator>::value_type value_type; + typedef typename + std::iterator_traits<InputKeyIterator>::value_type key_type; + typedef typename + std::iterator_traits<OutputValueIterator>::value_type value_out_type; + + const context &context = queue.get_context(); + size_t count = detail::iterator_range_size(keys_first, keys_last); + + if(count == 0){ + return size_t(0); + } + + const device &device = queue.get_device(); + size_t work_group_size = get_work_group_size<value_type, key_type>(device); + + // Replace original key with unsigned integer keys generated based on given + // predicate. New key is also an index for keys_result and values_result vectors, + // which points to place where reduced value should be saved. + vector<uint_> new_keys(count, context); + vector<uint_>::iterator new_keys_first = new_keys.begin(); + generate_uint_keys(keys_first, count, predicate, new_keys_first, + work_group_size, queue); + + // Calculate carry-out and carry-in vectors size + const size_t carry_out_size = static_cast<size_t>( + std::ceil(float(count) / work_group_size) + ); + vector<uint_> carry_out_keys(carry_out_size, context); + vector<value_out_type> carry_out_values(carry_out_size, context); + carry_outs(new_keys_first, values_first, count, carry_out_keys.begin(), + carry_out_values.begin(), function, work_group_size, queue); + + vector<value_out_type> carry_in_values(carry_out_size, context); + carry_ins(carry_out_keys.begin(), carry_out_values.begin(), + carry_in_values.begin(), carry_out_size, function, work_group_size, + queue); + + final_reduction(keys_first, values_first, keys_result, values_result, + count, function, new_keys_first, carry_out_keys.begin(), + carry_in_values.begin(), carry_out_size, work_group_size, + queue); + + const size_t result = read_single_value<uint_>(new_keys.get_buffer(), + count - 1, queue); + return result + 1; +} + +/// \internal_ +/// Return true if requirements for running reduce by key with scan on given +/// device are met (at least one work group of preferred size can be run). +template<class InputKeyIterator, class InputValueIterator, + class OutputKeyIterator, class OutputValueIterator> +bool reduce_by_key_with_scan_requirements_met(InputKeyIterator keys_first, + InputValueIterator values_first, + OutputKeyIterator keys_result, + OutputValueIterator values_result, + const size_t count, + command_queue &queue) +{ + typedef typename + std::iterator_traits<InputValueIterator>::value_type value_type; + typedef typename + std::iterator_traits<InputKeyIterator>::value_type key_type; + typedef typename + std::iterator_traits<OutputValueIterator>::value_type value_out_type; + + (void) keys_first; + (void) values_first; + (void) keys_result; + (void) values_result; + + const device &device = queue.get_device(); + // device must have dedicated local memory storage + if(device.get_info<CL_DEVICE_LOCAL_MEM_TYPE>() != CL_LOCAL) + { + return false; + } + + // local memory size in bytes (per compute unit) + const size_t local_mem_size = device.get_info<CL_DEVICE_LOCAL_MEM_SIZE>(); + + // preferred work group size + size_t work_group_size = get_work_group_size<key_type, value_type>(device); + + // local memory size needed to perform parallel reduction + size_t required_local_mem_size = 0; + // keys size + required_local_mem_size += sizeof(uint_) * work_group_size; + // reduced values size + required_local_mem_size += sizeof(value_out_type) * work_group_size; + + return (required_local_mem_size <= local_mem_size); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_BY_KEY_WITH_SCAN_HPP diff --git a/boost/compute/algorithm/detail/reduce_on_gpu.hpp b/boost/compute/algorithm/detail/reduce_on_gpu.hpp new file mode 100644 index 0000000000..335fba8724 --- /dev/null +++ b/boost/compute/algorithm/detail/reduce_on_gpu.hpp @@ -0,0 +1,286 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_ON_GPU_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_ON_GPU_HPP + +#include <iterator> + +#include <boost/compute/utility/source.hpp> +#include <boost/compute/program.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/detail/vendor.hpp> +#include <boost/compute/detail/parameter_cache.hpp> +#include <boost/compute/detail/work_size.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/type_traits/type_name.hpp> +#include <boost/compute/utility/program_cache.hpp> + +namespace boost { +namespace compute { +namespace detail { + +/// \internal +/// body reduction inside a warp +template<typename T,bool isNvidiaDevice> +struct ReduceBody +{ + static std::string body() + { + std::stringstream k; + // local reduction + k << "for(int i = 1; i < TPB; i <<= 1){\n" << + " barrier(CLK_LOCAL_MEM_FENCE);\n" << + " uint mask = (i << 1) - 1;\n" << + " if((lid & mask) == 0){\n" << + " scratch[lid] += scratch[lid+i];\n" << + " }\n" << + "}\n"; + return k.str(); + } +}; + +/// \internal +/// body reduction inside a warp +/// for nvidia device we can use the "unsafe" +/// memory optimisation +template<typename T> +struct ReduceBody<T,true> +{ + static std::string body() + { + std::stringstream k; + // local reduction + // we use TPB to compile only useful instruction + // local reduction when size is greater than warp size + k << "barrier(CLK_LOCAL_MEM_FENCE);\n" << + "if(TPB >= 1024){\n" << + "if(lid < 512) { sum += scratch[lid + 512]; scratch[lid] = sum;} barrier(CLK_LOCAL_MEM_FENCE);}\n" << + "if(TPB >= 512){\n" << + "if(lid < 256) { sum += scratch[lid + 256]; scratch[lid] = sum;} barrier(CLK_LOCAL_MEM_FENCE);}\n" << + "if(TPB >= 256){\n" << + "if(lid < 128) { sum += scratch[lid + 128]; scratch[lid] = sum;} barrier(CLK_LOCAL_MEM_FENCE);}\n" << + "if(TPB >= 128){\n" << + "if(lid < 64) { sum += scratch[lid + 64]; scratch[lid] = sum;} barrier(CLK_LOCAL_MEM_FENCE);} \n" << + + // warp reduction + "if(lid < 32){\n" << + // volatile this way we don't need any barrier + "volatile __local " << type_name<T>() << " *lmem = scratch;\n" << + "if(TPB >= 64) { lmem[lid] = sum = sum + lmem[lid+32];} \n" << + "if(TPB >= 32) { lmem[lid] = sum = sum + lmem[lid+16];} \n" << + "if(TPB >= 16) { lmem[lid] = sum = sum + lmem[lid+ 8];} \n" << + "if(TPB >= 8) { lmem[lid] = sum = sum + lmem[lid+ 4];} \n" << + "if(TPB >= 4) { lmem[lid] = sum = sum + lmem[lid+ 2];} \n" << + "if(TPB >= 2) { lmem[lid] = sum = sum + lmem[lid+ 1];} \n" << + "}\n"; + return k.str(); + } +}; + +template<class InputIterator, class Function> +inline void initial_reduce(InputIterator first, + InputIterator last, + buffer result, + const Function &function, + kernel &reduce_kernel, + const uint_ vpt, + const uint_ tpb, + command_queue &queue) +{ + (void) function; + (void) reduce_kernel; + + typedef typename std::iterator_traits<InputIterator>::value_type Arg; + typedef typename boost::tr1_result_of<Function(Arg, Arg)>::type T; + + size_t count = std::distance(first, last); + detail::meta_kernel k("initial_reduce"); + k.add_set_arg<const uint_>("count", uint_(count)); + size_t output_arg = k.add_arg<T *>(memory_object::global_memory, "output"); + + k << + k.decl<const uint_>("offset") << " = get_group_id(0) * VPT * TPB;\n" << + k.decl<const uint_>("lid") << " = get_local_id(0);\n" << + + "__local " << type_name<T>() << " scratch[TPB];\n" << + + // private reduction + k.decl<T>("sum") << " = 0;\n" << + "for(uint i = 0; i < VPT; i++){\n" << + " if(offset + lid + i*TPB < count){\n" << + " sum = sum + " << first[k.var<uint_>("offset+lid+i*TPB")] << ";\n" << + " }\n" << + "}\n" << + + "scratch[lid] = sum;\n" << + + // local reduction + ReduceBody<T,false>::body() << + + // write sum to output + "if(lid == 0){\n" << + " output[get_group_id(0)] = scratch[0];\n" << + "}\n"; + + const context &context = queue.get_context(); + std::stringstream options; + options << "-DVPT=" << vpt << " -DTPB=" << tpb; + kernel generic_reduce_kernel = k.compile(context, options.str()); + generic_reduce_kernel.set_arg(output_arg, result); + + size_t work_size = calculate_work_size(count, vpt, tpb); + + queue.enqueue_1d_range_kernel(generic_reduce_kernel, 0, work_size, tpb); +} + +template<class T> +inline void initial_reduce(const buffer_iterator<T> &first, + const buffer_iterator<T> &last, + const buffer &result, + const plus<T> &function, + kernel &reduce_kernel, + const uint_ vpt, + const uint_ tpb, + command_queue &queue) +{ + (void) function; + + size_t count = std::distance(first, last); + + reduce_kernel.set_arg(0, first.get_buffer()); + reduce_kernel.set_arg(1, uint_(first.get_index())); + reduce_kernel.set_arg(2, uint_(count)); + reduce_kernel.set_arg(3, result); + reduce_kernel.set_arg(4, uint_(0)); + + size_t work_size = calculate_work_size(count, vpt, tpb); + + queue.enqueue_1d_range_kernel(reduce_kernel, 0, work_size, tpb); +} + +template<class InputIterator, class T, class Function> +inline void reduce_on_gpu(InputIterator first, + InputIterator last, + buffer_iterator<T> result, + Function function, + command_queue &queue) +{ + const device &device = queue.get_device(); + const context &context = queue.get_context(); + + detail::meta_kernel k("reduce"); + k.add_arg<const T*>(memory_object::global_memory, "input"); + k.add_arg<const uint_>("offset"); + k.add_arg<const uint_>("count"); + k.add_arg<T*>(memory_object::global_memory, "output"); + k.add_arg<const uint_>("output_offset"); + + k << + k.decl<const uint_>("block_offset") << " = get_group_id(0) * VPT * TPB;\n" << + "__global const " << type_name<T>() << " *block = input + offset + block_offset;\n" << + k.decl<const uint_>("lid") << " = get_local_id(0);\n" << + + "__local " << type_name<T>() << " scratch[TPB];\n" << + // private reduction + k.decl<T>("sum") << " = 0;\n" << + "for(uint i = 0; i < VPT; i++){\n" << + " if(block_offset + lid + i*TPB < count){\n" << + " sum = sum + block[lid+i*TPB]; \n" << + " }\n" << + "}\n" << + + "scratch[lid] = sum;\n"; + + // discrimination on vendor name + if(is_nvidia_device(device)) + k << ReduceBody<T,true>::body(); + else + k << ReduceBody<T,false>::body(); + + k << + // write sum to output + "if(lid == 0){\n" << + " output[output_offset + get_group_id(0)] = scratch[0];\n" << + "}\n"; + + std::string cache_key = std::string("__boost_reduce_on_gpu_") + type_name<T>(); + + // load parameters + boost::shared_ptr<parameter_cache> parameters = + detail::parameter_cache::get_global_cache(device); + + uint_ vpt = parameters->get(cache_key, "vpt", 8); + uint_ tpb = parameters->get(cache_key, "tpb", 128); + + // reduce program compiler flags + std::stringstream options; + options << "-DT=" << type_name<T>() + << " -DVPT=" << vpt + << " -DTPB=" << tpb; + + // load program + boost::shared_ptr<program_cache> cache = + program_cache::get_global_cache(context); + + program reduce_program = cache->get_or_build( + cache_key, options.str(), k.source(), context + ); + + // create reduce kernel + kernel reduce_kernel(reduce_program, "reduce"); + + size_t count = std::distance(first, last); + + // first pass, reduce from input to ping + buffer ping(context, std::ceil(float(count) / vpt / tpb) * sizeof(T)); + initial_reduce(first, last, ping, function, reduce_kernel, vpt, tpb, queue); + + // update count after initial reduce + count = static_cast<size_t>(std::ceil(float(count) / vpt / tpb)); + + // middle pass(es), reduce between ping and pong + const buffer *input_buffer = &ping; + buffer pong(context, static_cast<size_t>(count / vpt / tpb * sizeof(T))); + const buffer *output_buffer = &pong; + if(count > vpt * tpb){ + while(count > vpt * tpb){ + reduce_kernel.set_arg(0, *input_buffer); + reduce_kernel.set_arg(1, uint_(0)); + reduce_kernel.set_arg(2, uint_(count)); + reduce_kernel.set_arg(3, *output_buffer); + reduce_kernel.set_arg(4, uint_(0)); + + size_t work_size = static_cast<size_t>(std::ceil(float(count) / vpt)); + if(work_size % tpb != 0){ + work_size += tpb - work_size % tpb; + } + queue.enqueue_1d_range_kernel(reduce_kernel, 0, work_size, tpb); + + std::swap(input_buffer, output_buffer); + count = static_cast<size_t>(std::ceil(float(count) / vpt / tpb)); + } + } + + // final pass, reduce from ping/pong to result + reduce_kernel.set_arg(0, *input_buffer); + reduce_kernel.set_arg(1, uint_(0)); + reduce_kernel.set_arg(2, uint_(count)); + reduce_kernel.set_arg(3, result.get_buffer()); + reduce_kernel.set_arg(4, uint_(result.get_index())); + + queue.enqueue_1d_range_kernel(reduce_kernel, 0, tpb, tpb); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_REDUCE_ON_GPU_HPP diff --git a/boost/compute/algorithm/detail/scan.hpp b/boost/compute/algorithm/detail/scan.hpp new file mode 100644 index 0000000000..154b6001be --- /dev/null +++ b/boost/compute/algorithm/detail/scan.hpp @@ -0,0 +1,45 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_HPP + +#include <boost/compute/device.hpp> +#include <boost/compute/algorithm/detail/scan_on_cpu.hpp> +#include <boost/compute/algorithm/detail/scan_on_gpu.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class InputIterator, class OutputIterator, class T, class BinaryOperator> +inline OutputIterator scan(InputIterator first, + InputIterator last, + OutputIterator result, + bool exclusive, + T init, + BinaryOperator op, + command_queue &queue) +{ + const device &device = queue.get_device(); + + if(device.type() & device::cpu){ + return scan_on_cpu(first, last, result, exclusive, init, op, queue); + } + else { + return scan_on_gpu(first, last, result, exclusive, init, op, queue); + } +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_HPP diff --git a/boost/compute/algorithm/detail/scan_on_cpu.hpp b/boost/compute/algorithm/detail/scan_on_cpu.hpp new file mode 100644 index 0000000000..6611c0ba3e --- /dev/null +++ b/boost/compute/algorithm/detail/scan_on_cpu.hpp @@ -0,0 +1,103 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_ON_CPU_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_ON_CPU_HPP + +#include <iterator> + +#include <boost/compute/device.hpp> +#include <boost/compute/kernel.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class InputIterator, class OutputIterator, class T, class BinaryOperator> +inline OutputIterator scan_on_cpu(InputIterator first, + InputIterator last, + OutputIterator result, + bool exclusive, + T init, + BinaryOperator op, + command_queue &queue) +{ + if(first == last){ + return result; + } + + typedef typename + std::iterator_traits<InputIterator>::value_type input_type; + typedef typename + std::iterator_traits<OutputIterator>::value_type output_type; + + const context &context = queue.get_context(); + + // create scan kernel + meta_kernel k("scan_on_cpu"); + + // Arguments + size_t n_arg = k.add_arg<ulong_>("n"); + size_t init_arg = k.add_arg<output_type>("initial_value"); + + if(!exclusive){ + k << + k.decl<const ulong_>("start_idx") << " = 1;\n" << + k.decl<output_type>("sum") << " = " << first[0] << ";\n" << + result[0] << " = sum;\n"; + } + else { + k << + k.decl<const ulong_>("start_idx") << " = 0;\n" << + k.decl<output_type>("sum") << " = initial_value;\n"; + } + + k << + "for(ulong i = start_idx; i < n; i++){\n" << + k.decl<const input_type>("x") << " = " + << first[k.var<ulong_>("i")] << ";\n"; + + if(exclusive){ + k << result[k.var<ulong_>("i")] << " = sum;\n"; + } + + k << " sum = " + << op(k.var<output_type>("sum"), k.var<output_type>("x")) + << ";\n"; + + if(!exclusive){ + k << result[k.var<ulong_>("i")] << " = sum;\n"; + } + + k << "}\n"; + + // compile scan kernel + kernel scan_kernel = k.compile(context); + + // setup kernel arguments + size_t n = detail::iterator_range_size(first, last); + scan_kernel.set_arg<ulong_>(n_arg, n); + scan_kernel.set_arg<output_type>(init_arg, static_cast<output_type>(init)); + + // execute the kernel + queue.enqueue_1d_range_kernel(scan_kernel, 0, 1, 1); + + // return iterator pointing to the end of the result range + return result + n; +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_ON_CPU_HPP diff --git a/boost/compute/algorithm/detail/scan_on_gpu.hpp b/boost/compute/algorithm/detail/scan_on_gpu.hpp new file mode 100644 index 0000000000..07c6d6d3c0 --- /dev/null +++ b/boost/compute/algorithm/detail/scan_on_gpu.hpp @@ -0,0 +1,331 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_ON_GPU_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_ON_GPU_HPP + +#include <boost/compute/kernel.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/detail/scan_on_cpu.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/memory/local_buffer.hpp> +#include <boost/compute/iterator/buffer_iterator.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class InputIterator, class OutputIterator, class BinaryOperator> +class local_scan_kernel : public meta_kernel +{ +public: + local_scan_kernel(InputIterator first, + InputIterator last, + OutputIterator result, + bool exclusive, + BinaryOperator op) + : meta_kernel("local_scan") + { + typedef typename std::iterator_traits<InputIterator>::value_type T; + + (void) last; + + bool checked = true; + + m_block_sums_arg = add_arg<T *>(memory_object::global_memory, "block_sums"); + m_scratch_arg = add_arg<T *>(memory_object::local_memory, "scratch"); + m_block_size_arg = add_arg<const cl_uint>("block_size"); + m_count_arg = add_arg<const cl_uint>("count"); + m_init_value_arg = add_arg<const T>("init"); + + // work-item parameters + *this << + "const uint gid = get_global_id(0);\n" << + "const uint lid = get_local_id(0);\n"; + + // check against data size + if(checked){ + *this << + "if(gid < count){\n"; + } + + // copy values from input to local memory + if(exclusive){ + *this << + decl<const T>("local_init") << "= (gid == 0) ? init : 0;\n" << + "if(lid == 0){ scratch[lid] = local_init; }\n" << + "else { scratch[lid] = " << first[expr<cl_uint>("gid-1")] << "; }\n"; + } + else{ + *this << + "scratch[lid] = " << first[expr<cl_uint>("gid")] << ";\n"; + } + + if(checked){ + *this << + "}\n" + "else {\n" << + " scratch[lid] = 0;\n" << + "}\n"; + } + + // wait for all threads to read from input + *this << + "barrier(CLK_LOCAL_MEM_FENCE);\n"; + + // perform scan + *this << + "for(uint i = 1; i < block_size; i <<= 1){\n" << + " " << decl<const T>("x") << " = lid >= i ? scratch[lid-i] : 0;\n" << + " barrier(CLK_LOCAL_MEM_FENCE);\n" << + " if(lid >= i){\n" << + " scratch[lid] = " << op(var<T>("scratch[lid]"), var<T>("x")) << ";\n" << + " }\n" << + " barrier(CLK_LOCAL_MEM_FENCE);\n" << + "}\n"; + + // copy results to output + if(checked){ + *this << + "if(gid < count){\n"; + } + + *this << + result[expr<cl_uint>("gid")] << " = scratch[lid];\n"; + + if(checked){ + *this << "}\n"; + } + + // store sum for the block + if(exclusive){ + *this << + "if(lid == block_size - 1){\n" << + " block_sums[get_group_id(0)] = " << + op(first[expr<cl_uint>("gid")], var<T>("scratch[lid]")) << + ";\n" << + "}\n"; + } + else { + *this << + "if(lid == block_size - 1){\n" << + " block_sums[get_group_id(0)] = scratch[lid];\n" << + "}\n"; + } + } + + size_t m_block_sums_arg; + size_t m_scratch_arg; + size_t m_block_size_arg; + size_t m_count_arg; + size_t m_init_value_arg; +}; + +template<class T, class BinaryOperator> +class write_scanned_output_kernel : public meta_kernel +{ +public: + write_scanned_output_kernel(BinaryOperator op) + : meta_kernel("write_scanned_output") + { + bool checked = true; + + m_output_arg = add_arg<T *>(memory_object::global_memory, "output"); + m_block_sums_arg = add_arg<const T *>(memory_object::global_memory, "block_sums"); + m_count_arg = add_arg<const cl_uint>("count"); + + // work-item parameters + *this << + "const uint gid = get_global_id(0);\n" << + "const uint block_id = get_group_id(0);\n"; + + // check against data size + if(checked){ + *this << "if(gid < count){\n"; + } + + // write output + *this << + "output[gid] = " << + op(var<T>("block_sums[block_id]"), var<T>("output[gid] ")) << ";\n"; + + if(checked){ + *this << "}\n"; + } + } + + size_t m_output_arg; + size_t m_block_sums_arg; + size_t m_count_arg; +}; + +template<class InputIterator> +inline size_t pick_scan_block_size(InputIterator first, InputIterator last) +{ + size_t count = iterator_range_size(first, last); + + if(count == 0) { return 0; } + else if(count <= 1) { return 1; } + else if(count <= 2) { return 2; } + else if(count <= 4) { return 4; } + else if(count <= 8) { return 8; } + else if(count <= 16) { return 16; } + else if(count <= 32) { return 32; } + else if(count <= 64) { return 64; } + else if(count <= 128) { return 128; } + else { return 256; } +} + +template<class InputIterator, class OutputIterator, class T, class BinaryOperator> +inline OutputIterator scan_impl(InputIterator first, + InputIterator last, + OutputIterator result, + bool exclusive, + T init, + BinaryOperator op, + command_queue &queue) +{ + typedef typename + std::iterator_traits<InputIterator>::value_type + input_type; + typedef typename + std::iterator_traits<InputIterator>::difference_type + difference_type; + typedef typename + std::iterator_traits<OutputIterator>::value_type + output_type; + + const context &context = queue.get_context(); + const size_t count = detail::iterator_range_size(first, last); + + size_t block_size = pick_scan_block_size(first, last); + size_t block_count = count / block_size; + + if(block_count * block_size < count){ + block_count++; + } + + ::boost::compute::vector<input_type> block_sums(block_count, context); + + // zero block sums + input_type zero; + std::memset(&zero, 0, sizeof(input_type)); + ::boost::compute::fill(block_sums.begin(), block_sums.end(), zero, queue); + + // local scan + local_scan_kernel<InputIterator, OutputIterator, BinaryOperator> + local_scan_kernel(first, last, result, exclusive, op); + + ::boost::compute::kernel kernel = local_scan_kernel.compile(context); + kernel.set_arg(local_scan_kernel.m_scratch_arg, local_buffer<input_type>(block_size)); + kernel.set_arg(local_scan_kernel.m_block_sums_arg, block_sums); + kernel.set_arg(local_scan_kernel.m_block_size_arg, static_cast<cl_uint>(block_size)); + kernel.set_arg(local_scan_kernel.m_count_arg, static_cast<cl_uint>(count)); + kernel.set_arg(local_scan_kernel.m_init_value_arg, static_cast<output_type>(init)); + + queue.enqueue_1d_range_kernel(kernel, + 0, + block_count * block_size, + block_size); + + // inclusive scan block sums + if(block_count > 1){ + scan_impl(block_sums.begin(), + block_sums.end(), + block_sums.begin(), + false, + init, + op, + queue + ); + } + + // add block sums to each block + if(block_count > 1){ + write_scanned_output_kernel<input_type, BinaryOperator> + write_output_kernel(op); + kernel = write_output_kernel.compile(context); + kernel.set_arg(write_output_kernel.m_output_arg, result.get_buffer()); + kernel.set_arg(write_output_kernel.m_block_sums_arg, block_sums); + kernel.set_arg(write_output_kernel.m_count_arg, static_cast<cl_uint>(count)); + + queue.enqueue_1d_range_kernel(kernel, + block_size, + block_count * block_size, + block_size); + } + + return result + static_cast<difference_type>(count); +} + +template<class InputIterator, class OutputIterator, class T, class BinaryOperator> +inline OutputIterator dispatch_scan(InputIterator first, + InputIterator last, + OutputIterator result, + bool exclusive, + T init, + BinaryOperator op, + command_queue &queue) +{ + return scan_impl(first, last, result, exclusive, init, op, queue); +} + +template<class InputIterator, class T, class BinaryOperator> +inline InputIterator dispatch_scan(InputIterator first, + InputIterator last, + InputIterator result, + bool exclusive, + T init, + BinaryOperator op, + command_queue &queue) +{ + typedef typename std::iterator_traits<InputIterator>::value_type value_type; + + if(first == result){ + // scan input in-place + const context &context = queue.get_context(); + + // make a temporary copy the input + size_t count = iterator_range_size(first, last); + vector<value_type> tmp(count, context); + copy(first, last, tmp.begin(), queue); + + // scan from temporary values + return scan_impl(tmp.begin(), tmp.end(), first, exclusive, init, op, queue); + } + else { + // scan input to output + return scan_impl(first, last, result, exclusive, init, op, queue); + } +} + +template<class InputIterator, class OutputIterator, class T, class BinaryOperator> +inline OutputIterator scan_on_gpu(InputIterator first, + InputIterator last, + OutputIterator result, + bool exclusive, + T init, + BinaryOperator op, + command_queue &queue) +{ + if(first == last){ + return result; + } + + return dispatch_scan(first, last, result, exclusive, init, op, queue); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SCAN_ON_GPU_HPP diff --git a/boost/compute/algorithm/detail/search_all.hpp b/boost/compute/algorithm/detail/search_all.hpp new file mode 100644 index 0000000000..a874bcdebe --- /dev/null +++ b/boost/compute/algorithm/detail/search_all.hpp @@ -0,0 +1,86 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SEARCH_ALL_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_SEARCH_ALL_HPP + +#include <boost/compute/algorithm/copy.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/lambda.hpp> +#include <boost/compute/system.hpp> + +namespace boost { +namespace compute { +namespace detail { + +/// +/// \brief Search kernel class +/// +/// Subclass of meta_kernel which is capable of performing pattern matching +/// +template<class PatternIterator, class TextIterator, class OutputIterator> +class search_kernel : public meta_kernel +{ +public: + search_kernel() : meta_kernel("search") + {} + + void set_range(PatternIterator p_first, + PatternIterator p_last, + TextIterator t_first, + TextIterator t_last, + OutputIterator result) + { + m_p_count = iterator_range_size(p_first, p_last); + m_p_count_arg = add_arg<uint_>("p_count"); + + m_count = iterator_range_size(t_first, t_last); + m_count = m_count + 1 - m_p_count; + + *this << + "uint i = get_global_id(0);\n" << + "uint i1 = i;\n" << + "uint j;\n" << + "for(j = 0; j<p_count; j++,i++)\n" << + "{\n" << + " if(" << p_first[expr<uint_>("j")] << " != " << + t_first[expr<uint_>("i")] << ")\n" << + " j = p_count + 1;\n" << + "}\n" << + "if(j == p_count)\n" << + result[expr<uint_>("i1")] << " = 1;\n" << + "else\n" << + result[expr<uint_>("i1")] << " = 0;\n"; + } + + event exec(command_queue &queue) + { + if(m_count == 0) { + return event(); + } + + set_arg(m_p_count_arg, uint_(m_p_count)); + + return exec_1d(queue, 0, m_count); + } + +private: + size_t m_p_count; + size_t m_p_count_arg; + size_t m_count; +}; + +} //end detail namespace +} //end compute namespace +} //end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SEARCH_ALL_HPP diff --git a/boost/compute/algorithm/detail/serial_accumulate.hpp b/boost/compute/algorithm/detail/serial_accumulate.hpp new file mode 100644 index 0000000000..84f9910122 --- /dev/null +++ b/boost/compute/algorithm/detail/serial_accumulate.hpp @@ -0,0 +1,56 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_ACCUMULATE_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_ACCUMULATE_HPP + +#include <boost/compute/command_queue.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class InputIterator, class OutputIterator, class T, class BinaryFunction> +inline void serial_accumulate(InputIterator first, + InputIterator last, + OutputIterator result, + T init, + BinaryFunction function, + command_queue &queue) +{ + const context &context = queue.get_context(); + size_t count = detail::iterator_range_size(first, last); + + meta_kernel k("serial_accumulate"); + size_t init_arg = k.add_arg<T>("init"); + size_t count_arg = k.add_arg<cl_uint>("count"); + + k << + k.decl<T>("result") << " = init;\n" << + "for(uint i = 0; i < count; i++)\n" << + " result = " << function(k.var<T>("result"), + first[k.var<cl_uint>("i")]) << ";\n" << + result[0] << " = result;\n"; + + kernel kernel = k.compile(context); + + kernel.set_arg(init_arg, init); + kernel.set_arg(count_arg, static_cast<cl_uint>(count)); + + queue.enqueue_task(kernel); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_ACCUMULATE_HPP diff --git a/boost/compute/algorithm/detail/serial_count_if.hpp b/boost/compute/algorithm/detail/serial_count_if.hpp new file mode 100644 index 0000000000..be6794c426 --- /dev/null +++ b/boost/compute/algorithm/detail/serial_count_if.hpp @@ -0,0 +1,68 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_COUNT_IF_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_COUNT_IF_HPP + +#include <iterator> + +#include <boost/compute/container/detail/scalar.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> + +namespace boost { +namespace compute { +namespace detail { + +// counts values that match the predicate using a single thread +template<class InputIterator, class Predicate> +inline size_t serial_count_if(InputIterator first, + InputIterator last, + Predicate predicate, + command_queue &queue) +{ + typedef typename std::iterator_traits<InputIterator>::value_type value_type; + + const context &context = queue.get_context(); + size_t size = iterator_range_size(first, last); + + meta_kernel k("serial_count_if"); + k.add_set_arg("size", static_cast<uint_>(size)); + size_t result_arg = k.add_arg<uint_ *>(memory_object::global_memory, "result"); + + k << + "uint count = 0;\n" << + "for(uint i = 0; i < size; i++){\n" << + k.decl<const value_type>("value") << "=" + << first[k.var<uint_>("i")] << ";\n" << + "if(" << predicate(k.var<const value_type>("value")) << "){\n" << + "count++;\n" << + "}\n" + "}\n" + "*result = count;\n"; + + kernel kernel = k.compile(context); + + // setup result buffer + scalar<uint_> result(context); + kernel.set_arg(result_arg, result.get_buffer()); + + // run kernel + queue.enqueue_task(kernel); + + // read index + return result.read(queue); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_COUNT_IF_HPP diff --git a/boost/compute/algorithm/detail/serial_find_extrema.hpp b/boost/compute/algorithm/detail/serial_find_extrema.hpp new file mode 100644 index 0000000000..8407c88129 --- /dev/null +++ b/boost/compute/algorithm/detail/serial_find_extrema.hpp @@ -0,0 +1,87 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_FIND_EXTREMA_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_FIND_EXTREMA_HPP + +#include <boost/compute/command_queue.hpp> +#include <boost/compute/types/fundamental.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/container/detail/scalar.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class InputIterator, class Compare> +inline InputIterator serial_find_extrema(InputIterator first, + InputIterator last, + Compare compare, + const bool find_minimum, + command_queue &queue) +{ + typedef typename std::iterator_traits<InputIterator>::value_type value_type; + typedef typename std::iterator_traits<InputIterator>::difference_type difference_type; + + const context &context = queue.get_context(); + + meta_kernel k("serial_find_extrema"); + + k << + k.decl<value_type>("value") << " = " << first[k.expr<uint_>("0")] << ";\n" << + k.decl<uint_>("value_index") << " = 0;\n" << + "for(uint i = 1; i < size; i++){\n" << + " " << k.decl<value_type>("candidate") << "=" + << first[k.expr<uint_>("i")] << ";\n" << + + "#ifndef BOOST_COMPUTE_FIND_MAXIMUM\n" << + " if(" << compare(k.var<value_type>("candidate"), + k.var<value_type>("value")) << "){\n" << + "#else\n" << + " if(" << compare(k.var<value_type>("value"), + k.var<value_type>("candidate")) << "){\n" << + "#endif\n" << + + " value = candidate;\n" << + " value_index = i;\n" << + " }\n" << + "}\n" << + "*index = value_index;\n"; + + size_t index_arg_index = k.add_arg<uint_ *>(memory_object::global_memory, "index"); + size_t size_arg_index = k.add_arg<uint_>("size"); + + std::string options; + if(!find_minimum){ + options = "-DBOOST_COMPUTE_FIND_MAXIMUM"; + } + kernel kernel = k.compile(context, options); + + // setup index buffer + scalar<uint_> index(context); + kernel.set_arg(index_arg_index, index.get_buffer()); + + // setup count + size_t count = iterator_range_size(first, last); + kernel.set_arg(size_arg_index, static_cast<uint_>(count)); + + // run kernel + queue.enqueue_task(kernel); + + // read index and return iterator + return first + static_cast<difference_type>(index.read(queue)); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_FIND_EXTREMA_HPP diff --git a/boost/compute/algorithm/detail/serial_merge.hpp b/boost/compute/algorithm/detail/serial_merge.hpp new file mode 100644 index 0000000000..85e38f704c --- /dev/null +++ b/boost/compute/algorithm/detail/serial_merge.hpp @@ -0,0 +1,97 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_SERIAL_MERGE_HPP +#define BOOST_COMPUTE_ALGORITHM_SERIAL_MERGE_HPP + +#include <iterator> + +#include <boost/compute/command_queue.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class InputIterator1, + class InputIterator2, + class OutputIterator, + class Compare> +inline OutputIterator serial_merge(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + Compare comp, + command_queue &queue) +{ + typedef typename + std::iterator_traits<InputIterator1>::value_type + input_type1; + typedef typename + std::iterator_traits<InputIterator2>::value_type + input_type2; + typedef typename + std::iterator_traits<OutputIterator>::difference_type + result_difference_type; + + std::ptrdiff_t size1 = std::distance(first1, last1); + std::ptrdiff_t size2 = std::distance(first2, last2); + + meta_kernel k("serial_merge"); + k.add_set_arg<uint_>("size1", static_cast<uint_>(size1)); + k.add_set_arg<uint_>("size2", static_cast<uint_>(size2)); + + k << + "uint i = 0;\n" << // index in result range + "uint j = 0;\n" << // index in first input range + "uint k = 0;\n" << // index in second input range + + // fetch initial values from each range + k.decl<input_type1>("j_value") << " = " << first1[0] << ";\n" << + k.decl<input_type2>("k_value") << " = " << first2[0] << ";\n" << + + // merge values from both input ranges to the result range + "while(j < size1 && k < size2){\n" << + " if(" << comp(k.var<input_type1>("j_value"), + k.var<input_type2>("k_value")) << "){\n" << + " " << result[k.var<uint_>("i++")] << " = j_value;\n" << + " j_value = " << first1[k.var<uint_>("++j")] << ";\n" << + " }\n" << + " else{\n" + " " << result[k.var<uint_>("i++")] << " = k_value;\n" + " k_value = " << first2[k.var<uint_>("++k")] << ";\n" << + " }\n" + "}\n" + + // copy any remaining values from first range + "while(j < size1){\n" << + result[k.var<uint_>("i++")] << " = " << + first1[k.var<uint_>("j++")] << ";\n" << + "}\n" + + // copy any remaining values from second range + "while(k < size2){\n" << + result[k.var<uint_>("i++")] << " = " << + first2[k.var<uint_>("k++")] << ";\n" << + "}\n"; + + // run kernel + k.exec(queue); + + return result + static_cast<result_difference_type>(size1 + size2); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_SERIAL_MERGE_HPP diff --git a/boost/compute/algorithm/detail/serial_reduce.hpp b/boost/compute/algorithm/detail/serial_reduce.hpp new file mode 100644 index 0000000000..53aaf140fe --- /dev/null +++ b/boost/compute/algorithm/detail/serial_reduce.hpp @@ -0,0 +1,62 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_REDUCE_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_REDUCE_HPP + +#include <boost/compute/command_queue.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/type_traits/result_of.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class InputIterator, class OutputIterator, class BinaryFunction> +inline void serial_reduce(InputIterator first, + InputIterator last, + OutputIterator result, + BinaryFunction function, + command_queue &queue) +{ + typedef typename + std::iterator_traits<InputIterator>::value_type T; + typedef typename + ::boost::compute::result_of<BinaryFunction(T, T)>::type result_type; + + const context &context = queue.get_context(); + size_t count = detail::iterator_range_size(first, last); + if(count == 0){ + return; + } + + meta_kernel k("serial_reduce"); + size_t count_arg = k.add_arg<cl_uint>("count"); + + k << + k.decl<result_type>("result") << " = " << first[0] << ";\n" << + "for(uint i = 1; i < count; i++)\n" << + " result = " << function(k.var<T>("result"), + first[k.var<uint_>("i")]) << ";\n" << + result[0] << " = result;\n"; + + kernel kernel = k.compile(context); + + kernel.set_arg(count_arg, static_cast<uint_>(count)); + + queue.enqueue_task(kernel); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_REDUCE_HPP diff --git a/boost/compute/algorithm/detail/serial_reduce_by_key.hpp b/boost/compute/algorithm/detail/serial_reduce_by_key.hpp new file mode 100644 index 0000000000..f9bda8e476 --- /dev/null +++ b/boost/compute/algorithm/detail/serial_reduce_by_key.hpp @@ -0,0 +1,108 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_REDUCE_BY_KEY_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_REDUCE_BY_KEY_HPP + +#include <iterator> + +#include <boost/compute/command_queue.hpp> +#include <boost/compute/functional.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/container/detail/scalar.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/type_traits/result_of.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class InputKeyIterator, class InputValueIterator, + class OutputKeyIterator, class OutputValueIterator, + class BinaryFunction, class BinaryPredicate> +inline size_t serial_reduce_by_key(InputKeyIterator keys_first, + InputKeyIterator keys_last, + InputValueIterator values_first, + OutputKeyIterator keys_result, + OutputValueIterator values_result, + BinaryFunction function, + BinaryPredicate predicate, + command_queue &queue) +{ + typedef typename + std::iterator_traits<InputValueIterator>::value_type value_type; + typedef typename + std::iterator_traits<InputKeyIterator>::value_type key_type; + typedef typename + ::boost::compute::result_of<BinaryFunction(value_type, value_type)>::type result_type; + + const context &context = queue.get_context(); + size_t count = detail::iterator_range_size(keys_first, keys_last); + if(count < 1){ + return count; + } + + meta_kernel k("serial_reduce_by_key"); + size_t count_arg = k.add_arg<uint_>("count"); + size_t result_size_arg = k.add_arg<uint_ *>(memory_object::global_memory, + "result_size"); + + convert<result_type> to_result_type; + + k << + k.decl<result_type>("result") << + " = " << to_result_type(values_first[0]) << ";\n" << + k.decl<key_type>("previous_key") << " = " << keys_first[0] << ";\n" << + k.decl<result_type>("value") << ";\n" << + k.decl<key_type>("key") << ";\n" << + + k.decl<uint_>("size") << " = 1;\n" << + + keys_result[0] << " = previous_key;\n" << + values_result[0] << " = result;\n" << + + "for(ulong i = 1; i < count; i++) {\n" << + " value = " << to_result_type(values_first[k.var<uint_>("i")]) << ";\n" << + " key = " << keys_first[k.var<uint_>("i")] << ";\n" << + " if (" << predicate(k.var<key_type>("previous_key"), + k.var<key_type>("key")) << ") {\n" << + + " result = " << function(k.var<result_type>("result"), + k.var<result_type>("value")) << ";\n" << + " }\n " << + " else { \n" << + keys_result[k.var<uint_>("size - 1")] << " = previous_key;\n" << + values_result[k.var<uint_>("size - 1")] << " = result;\n" << + " result = value;\n" << + " size++;\n" << + " } \n" << + " previous_key = key;\n" << + "}\n" << + keys_result[k.var<uint_>("size - 1")] << " = previous_key;\n" << + values_result[k.var<uint_>("size - 1")] << " = result;\n" << + "*result_size = size;"; + + kernel kernel = k.compile(context); + + scalar<uint_> result_size(context); + kernel.set_arg(result_size_arg, result_size.get_buffer()); + kernel.set_arg(count_arg, static_cast<uint_>(count)); + + queue.enqueue_task(kernel); + + return static_cast<size_t>(result_size.read(queue)); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_REDUCE_BY_KEY_HPP diff --git a/boost/compute/algorithm/equal.hpp b/boost/compute/algorithm/equal.hpp new file mode 100644 index 0000000000..35d0c5f0ea --- /dev/null +++ b/boost/compute/algorithm/equal.hpp @@ -0,0 +1,53 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_EQUAL_HPP +#define BOOST_COMPUTE_ALGORITHM_EQUAL_HPP + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/mismatch.hpp> + +namespace boost { +namespace compute { + +/// Returns \c true if the range [\p first1, \p last1) and the range +/// beginning at \p first2 are equal. +template<class InputIterator1, class InputIterator2> +inline bool equal(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + command_queue &queue = system::default_queue()) +{ + return ::boost::compute::mismatch(first1, + last1, + first2, + queue).first == last1; +} + +/// \overload +template<class InputIterator1, class InputIterator2> +inline bool equal(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + command_queue &queue = system::default_queue()) +{ + if(std::distance(first1, last1) != std::distance(first2, last2)){ + return false; + } + + return ::boost::compute::equal(first1, last1, first2, queue); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_EQUAL_HPP diff --git a/boost/compute/algorithm/equal_range.hpp b/boost/compute/algorithm/equal_range.hpp new file mode 100644 index 0000000000..fd82177324 --- /dev/null +++ b/boost/compute/algorithm/equal_range.hpp @@ -0,0 +1,42 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_EQUAL_RANGE_HPP +#define BOOST_COMPUTE_ALGORITHM_EQUAL_RANGE_HPP + +#include <utility> + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/lower_bound.hpp> +#include <boost/compute/algorithm/upper_bound.hpp> + +namespace boost { +namespace compute { + +/// Returns a pair of iterators containing the range of values equal +/// to \p value in the sorted range [\p first, \p last). +template<class InputIterator, class T> +inline std::pair<InputIterator, InputIterator> +equal_range(InputIterator first, + InputIterator last, + const T &value, + command_queue &queue = system::default_queue()) +{ + return std::make_pair( + ::boost::compute::lower_bound(first, last, value, queue), + ::boost::compute::upper_bound(first, last, value, queue) + ); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_EQUAL_RANGE_HPP diff --git a/boost/compute/algorithm/exclusive_scan.hpp b/boost/compute/algorithm/exclusive_scan.hpp new file mode 100644 index 0000000000..205d3de658 --- /dev/null +++ b/boost/compute/algorithm/exclusive_scan.hpp @@ -0,0 +1,96 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_EXCLUSIVE_SCAN_HPP +#define BOOST_COMPUTE_ALGORITHM_EXCLUSIVE_SCAN_HPP + +#include <boost/compute/functional.hpp> +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/detail/scan.hpp> + +namespace boost { +namespace compute { + +/// Performs an exclusive scan of the elements in the range [\p first, \p last) +/// and stores the results in the range beginning at \p result. +/// +/// Each element in the output is assigned to the sum of all the previous +/// values in the input. +/// +/// \param first first element in the range to scan +/// \param last last element in the range to scan +/// \param result first element in the result range +/// \param init value used to initialize the scan sequence +/// \param binary_op associative binary operator +/// \param queue command queue to perform the operation +/// +/// \return \c OutputIterator to the end of the result range +/// +/// The default operation is to add the elements up. +/// +/// \snippet test/test_scan.cpp exclusive_scan_int +/// +/// But different associative operation can be specified as \p binary_op +/// instead (e.g., multiplication, maximum, minimum). Also value used to +/// initialized the scan sequence can be specified. +/// +/// \snippet test/test_scan.cpp exclusive_scan_int_multiplies +/// +/// \see inclusive_scan() +template<class InputIterator, class OutputIterator, class T, class BinaryOperator> +inline OutputIterator +exclusive_scan(InputIterator first, + InputIterator last, + OutputIterator result, + T init, + BinaryOperator binary_op, + command_queue &queue = system::default_queue()) +{ + return detail::scan(first, last, result, true, init, binary_op, queue); +} + +/// \overload +template<class InputIterator, class OutputIterator, class T> +inline OutputIterator +exclusive_scan(InputIterator first, + InputIterator last, + OutputIterator result, + T init, + command_queue &queue = system::default_queue()) +{ + typedef typename + std::iterator_traits<OutputIterator>::value_type output_type; + + return detail::scan(first, last, result, true, + init, boost::compute::plus<output_type>(), + queue); +} + +/// \overload +template<class InputIterator, class OutputIterator> +inline OutputIterator +exclusive_scan(InputIterator first, + InputIterator last, + OutputIterator result, + command_queue &queue = system::default_queue()) +{ + typedef typename + std::iterator_traits<OutputIterator>::value_type output_type; + + return detail::scan(first, last, result, true, + output_type(0), boost::compute::plus<output_type>(), + queue); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_EXCLUSIVE_SCAN_HPP diff --git a/boost/compute/algorithm/fill.hpp b/boost/compute/algorithm/fill.hpp new file mode 100644 index 0000000000..c711f46b94 --- /dev/null +++ b/boost/compute/algorithm/fill.hpp @@ -0,0 +1,306 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_FILL_HPP +#define BOOST_COMPUTE_ALGORITHM_FILL_HPP + +#include <iterator> + +#include <boost/mpl/int.hpp> +#include <boost/mpl/vector.hpp> +#include <boost/mpl/contains.hpp> +#include <boost/utility/enable_if.hpp> + +#include <boost/compute/cl.hpp> +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/copy.hpp> +#include <boost/compute/async/future.hpp> +#include <boost/compute/iterator/constant_iterator.hpp> +#include <boost/compute/iterator/discard_iterator.hpp> +#include <boost/compute/detail/is_buffer_iterator.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> + +namespace boost { +namespace compute { +namespace detail { + +namespace mpl = boost::mpl; + +// fills the range [first, first + count) with value using copy() +template<class BufferIterator, class T> +inline void fill_with_copy(BufferIterator first, + size_t count, + const T &value, + command_queue &queue) +{ + ::boost::compute::copy( + ::boost::compute::make_constant_iterator(value, 0), + ::boost::compute::make_constant_iterator(value, count), + first, + queue + ); +} + +// fills the range [first, first + count) with value using copy_async() +template<class BufferIterator, class T> +inline future<void> fill_async_with_copy(BufferIterator first, + size_t count, + const T &value, + command_queue &queue) +{ + return ::boost::compute::copy_async( + ::boost::compute::make_constant_iterator(value, 0), + ::boost::compute::make_constant_iterator(value, count), + first, + queue + ); +} + +#if defined(CL_VERSION_1_2) + +// meta-function returing true if Iterator points to a range of values +// that can be filled using clEnqueueFillBuffer(). to meet this criteria +// it must have a buffer accessible through iter.get_buffer() and the +// size of its value_type must by in {1, 2, 4, 8, 16, 32, 64, 128}. +template<class Iterator> +struct is_valid_fill_buffer_iterator : + public mpl::and_< + is_buffer_iterator<Iterator>, + mpl::contains< + mpl::vector< + mpl::int_<1>, + mpl::int_<2>, + mpl::int_<4>, + mpl::int_<8>, + mpl::int_<16>, + mpl::int_<32>, + mpl::int_<64>, + mpl::int_<128> + >, + mpl::int_< + sizeof(typename std::iterator_traits<Iterator>::value_type) + > + > + >::type { }; + +template<> +struct is_valid_fill_buffer_iterator<discard_iterator> : public boost::false_type {}; + +// specialization which uses clEnqueueFillBuffer for buffer iterators +template<class BufferIterator, class T> +inline void +dispatch_fill(BufferIterator first, + size_t count, + const T &value, + command_queue &queue, + typename boost::enable_if< + is_valid_fill_buffer_iterator<BufferIterator> + >::type* = 0) +{ + typedef typename std::iterator_traits<BufferIterator>::value_type value_type; + + if(count == 0){ + // nothing to do + return; + } + + // check if the device supports OpenCL 1.2 (required for enqueue_fill_buffer) + if(!queue.check_device_version(1, 2)){ + return fill_with_copy(first, count, value, queue); + } + + value_type pattern = static_cast<value_type>(value); + size_t offset = static_cast<size_t>(first.get_index()); + + if(count == 1){ + // use clEnqueueWriteBuffer() directly when writing a single value + // to the device buffer. this is potentially more efficient and also + // works around a bug in the intel opencl driver. + queue.enqueue_write_buffer( + first.get_buffer(), + offset * sizeof(value_type), + sizeof(value_type), + &pattern + ); + } + else { + queue.enqueue_fill_buffer( + first.get_buffer(), + &pattern, + sizeof(value_type), + offset * sizeof(value_type), + count * sizeof(value_type) + ); + } +} + +template<class BufferIterator, class T> +inline future<void> +dispatch_fill_async(BufferIterator first, + size_t count, + const T &value, + command_queue &queue, + typename boost::enable_if< + is_valid_fill_buffer_iterator<BufferIterator> + >::type* = 0) +{ + typedef typename std::iterator_traits<BufferIterator>::value_type value_type; + + // check if the device supports OpenCL 1.2 (required for enqueue_fill_buffer) + if(!queue.check_device_version(1, 2)){ + return fill_async_with_copy(first, count, value, queue); + } + + value_type pattern = static_cast<value_type>(value); + size_t offset = static_cast<size_t>(first.get_index()); + + event event_ = + queue.enqueue_fill_buffer(first.get_buffer(), + &pattern, + sizeof(value_type), + offset * sizeof(value_type), + count * sizeof(value_type)); + + return future<void>(event_); +} + +#ifdef CL_VERSION_2_0 +// specializations for svm_ptr<T> +template<class T> +inline void dispatch_fill(svm_ptr<T> first, + size_t count, + const T &value, + command_queue &queue) +{ + if(count == 0){ + return; + } + + queue.enqueue_svm_fill( + first.get(), &value, sizeof(T), count * sizeof(T) + ); +} + +template<class T> +inline future<void> dispatch_fill_async(svm_ptr<T> first, + size_t count, + const T &value, + command_queue &queue) +{ + if(count == 0){ + return future<void>(); + } + + event event_ = queue.enqueue_svm_fill( + first.get(), &value, sizeof(T), count * sizeof(T) + ); + + return future<void>(event_); +} +#endif // CL_VERSION_2_0 + +// default implementations +template<class BufferIterator, class T> +inline void +dispatch_fill(BufferIterator first, + size_t count, + const T &value, + command_queue &queue, + typename boost::disable_if< + is_valid_fill_buffer_iterator<BufferIterator> + >::type* = 0) +{ + fill_with_copy(first, count, value, queue); +} + +template<class BufferIterator, class T> +inline future<void> +dispatch_fill_async(BufferIterator first, + size_t count, + const T &value, + command_queue &queue, + typename boost::disable_if< + is_valid_fill_buffer_iterator<BufferIterator> + >::type* = 0) +{ + return fill_async_with_copy(first, count, value, queue); +} +#else +template<class BufferIterator, class T> +inline void dispatch_fill(BufferIterator first, + size_t count, + const T &value, + command_queue &queue) +{ + fill_with_copy(first, count, value, queue); +} + +template<class BufferIterator, class T> +inline future<void> dispatch_fill_async(BufferIterator first, + size_t count, + const T &value, + command_queue &queue) +{ + return fill_async_with_copy(first, count, value, queue); +} +#endif // !defined(CL_VERSION_1_2) + +} // end detail namespace + +/// Fills the range [\p first, \p last) with \p value. +/// +/// \param first first element in the range to fill +/// \param last last element in the range to fill +/// \param value value to copy to each element +/// \param queue command queue to perform the operation +/// +/// For example, to fill a vector on the device with sevens: +/// \code +/// // vector on the device +/// boost::compute::vector<int> vec(10, context); +/// +/// // fill vector with sevens +/// boost::compute::fill(vec.begin(), vec.end(), 7, queue); +/// \endcode +/// +/// \see boost::compute::fill_n() +template<class BufferIterator, class T> +inline void fill(BufferIterator first, + BufferIterator last, + const T &value, + command_queue &queue = system::default_queue()) +{ + size_t count = detail::iterator_range_size(first, last); + if(count == 0){ + return; + } + + detail::dispatch_fill(first, count, value, queue); +} + +template<class BufferIterator, class T> +inline future<void> fill_async(BufferIterator first, + BufferIterator last, + const T &value, + command_queue &queue = system::default_queue()) +{ + size_t count = detail::iterator_range_size(first, last); + if(count == 0){ + return future<void>(); + } + + return detail::dispatch_fill_async(first, count, value, queue); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_FILL_HPP diff --git a/boost/compute/algorithm/fill_n.hpp b/boost/compute/algorithm/fill_n.hpp new file mode 100644 index 0000000000..18a8f706a5 --- /dev/null +++ b/boost/compute/algorithm/fill_n.hpp @@ -0,0 +1,36 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_FILL_N_HPP +#define BOOST_COMPUTE_ALGORITHM_FILL_N_HPP + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/fill.hpp> + +namespace boost { +namespace compute { + +/// Fills the range [\p first, \p first + count) with \p value. +/// +/// \see fill() +template<class BufferIterator, class Size, class T> +inline void fill_n(BufferIterator first, + Size count, + const T &value, + command_queue &queue = system::default_queue()) +{ + ::boost::compute::fill(first, first + count, value, queue); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_FILL_N_HPP diff --git a/boost/compute/algorithm/find.hpp b/boost/compute/algorithm/find.hpp new file mode 100644 index 0000000000..ef3ebf0c47 --- /dev/null +++ b/boost/compute/algorithm/find.hpp @@ -0,0 +1,57 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_FIND_HPP +#define BOOST_COMPUTE_ALGORITHM_FIND_HPP + +#include <boost/compute/lambda.hpp> +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/find_if.hpp> +#include <boost/compute/type_traits/vector_size.hpp> + +namespace boost { +namespace compute { + +/// Returns an iterator pointing to the first element in the range +/// [\p first, \p last) that equals \p value. +template<class InputIterator, class T> +inline InputIterator find(InputIterator first, + InputIterator last, + const T &value, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<InputIterator>::value_type value_type; + + using ::boost::compute::_1; + using ::boost::compute::lambda::all; + + if(vector_size<value_type>::value == 1){ + return ::boost::compute::find_if( + first, + last, + _1 == value, + queue + ); + } + else { + return ::boost::compute::find_if( + first, + last, + all(_1 == value), + queue + ); + } +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_FIND_HPP diff --git a/boost/compute/algorithm/find_end.hpp b/boost/compute/algorithm/find_end.hpp new file mode 100644 index 0000000000..5c40055113 --- /dev/null +++ b/boost/compute/algorithm/find_end.hpp @@ -0,0 +1,119 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_FIND_END_HPP +#define BOOST_COMPUTE_ALGORITHM_FIND_END_HPP + +#include <boost/compute/algorithm/copy.hpp> +#include <boost/compute/algorithm/detail/search_all.hpp> +#include <boost/compute/container/detail/scalar.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/system.hpp> + +namespace boost { +namespace compute { +namespace detail { + +/// +/// \brief Helper function for find_end +/// +/// Basically a copy of find_if which returns last occurence +/// instead of first occurence +/// +template<class InputIterator, class UnaryPredicate> +inline InputIterator find_end_helper(InputIterator first, + InputIterator last, + UnaryPredicate predicate, + command_queue &queue) +{ + typedef typename std::iterator_traits<InputIterator>::value_type value_type; + + size_t count = detail::iterator_range_size(first, last); + if(count == 0){ + return last; + } + + const context &context = queue.get_context(); + + detail::meta_kernel k("find_end"); + size_t index_arg = k.add_arg<int *>(memory_object::global_memory, "index"); + atomic_max<int_> atomic_max_int; + + k << k.decl<const int_>("i") << " = get_global_id(0);\n" + << k.decl<const value_type>("value") << "=" + << first[k.var<const int_>("i")] << ";\n" + << "if(" << predicate(k.var<const value_type>("value")) << "){\n" + << " " << atomic_max_int(k.var<int_ *>("index"), k.var<int_>("i")) << ";\n" + << "}\n"; + + kernel kernel = k.compile(context); + + scalar<int_> index(context); + kernel.set_arg(index_arg, index.get_buffer()); + + index.write(static_cast<int_>(-1), queue); + + queue.enqueue_1d_range_kernel(kernel, 0, count, 0); + + int result = static_cast<int>(index.read(queue)); + if(result == -1) return last; + else return first + result; +} + +} // end detail namespace + +/// +/// \brief Substring matching algorithm +/// +/// Searches for the last match of the pattern [p_first, p_last) +/// in text [t_first, t_last). +/// \return Iterator pointing to beginning of last occurence +/// +/// \param t_first Iterator pointing to start of text +/// \param t_last Iterator pointing to end of text +/// \param p_first Iterator pointing to start of pattern +/// \param p_last Iterator pointing to end of pattern +/// \param queue Queue on which to execute +/// +template<class TextIterator, class PatternIterator> +inline TextIterator find_end(TextIterator t_first, + TextIterator t_last, + PatternIterator p_first, + PatternIterator p_last, + command_queue &queue = system::default_queue()) +{ + const context &context = queue.get_context(); + vector<uint_> matching_indices(detail::iterator_range_size(t_first, t_last), + context); + + detail::search_kernel<PatternIterator, + TextIterator, + vector<uint_>::iterator> kernel; + + kernel.set_range(p_first, p_last, t_first, t_last, matching_indices.begin()); + kernel.exec(queue); + + using boost::compute::_1; + + vector<uint_>::iterator index = + detail::find_end_helper(matching_indices.begin(), + matching_indices.end(), + _1 == 1, + queue); + + return t_first + detail::iterator_range_size(matching_indices.begin(), index); +} + +} //end compute namespace +} //end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_FIND_END_HPP diff --git a/boost/compute/algorithm/find_if.hpp b/boost/compute/algorithm/find_if.hpp new file mode 100644 index 0000000000..db99cc0396 --- /dev/null +++ b/boost/compute/algorithm/find_if.hpp @@ -0,0 +1,35 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_FIND_IF_HPP +#define BOOST_COMPUTE_ALGORITHM_FIND_IF_HPP + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/detail/find_if_with_atomics.hpp> + +namespace boost { +namespace compute { + +/// Returns an iterator pointing to the first element in the range +/// [\p first, \p last) for which \p predicate returns \c true. +template<class InputIterator, class UnaryPredicate> +inline InputIterator find_if(InputIterator first, + InputIterator last, + UnaryPredicate predicate, + command_queue &queue = system::default_queue()) +{ + return detail::find_if_with_atomics(first, last, predicate, queue); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_FIND_IF_HPP diff --git a/boost/compute/algorithm/find_if_not.hpp b/boost/compute/algorithm/find_if_not.hpp new file mode 100644 index 0000000000..61de050d31 --- /dev/null +++ b/boost/compute/algorithm/find_if_not.hpp @@ -0,0 +1,43 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_FIND_IF_NOT_HPP +#define BOOST_COMPUTE_ALGORITHM_FIND_IF_NOT_HPP + +#include <boost/compute/system.hpp> +#include <boost/compute/functional.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/find_if.hpp> + +namespace boost { +namespace compute { + +/// Returns an iterator pointing to the first element in the range +/// [\p first, \p last) for which \p predicate returns \c false. +/// +/// \see find_if() +template<class InputIterator, class UnaryPredicate> +inline InputIterator find_if_not(InputIterator first, + InputIterator last, + UnaryPredicate predicate, + command_queue &queue = system::default_queue()) +{ + return ::boost::compute::find_if( + first, + last, + not1(predicate), + queue + ); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_FIND_IF_NOT_HPP diff --git a/boost/compute/algorithm/for_each.hpp b/boost/compute/algorithm/for_each.hpp new file mode 100644 index 0000000000..3ed399e6e9 --- /dev/null +++ b/boost/compute/algorithm/for_each.hpp @@ -0,0 +1,65 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_FOR_EACH_HPP +#define BOOST_COMPUTE_ALGORITHM_FOR_EACH_HPP + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class InputIterator, class Function> +struct for_each_kernel : public meta_kernel +{ + for_each_kernel(InputIterator first, InputIterator last, Function function) + : meta_kernel("for_each") + { + // store range size + m_count = detail::iterator_range_size(first, last); + + // setup kernel source + *this << function(first[get_global_id(0)]) << ";\n"; + } + + void exec(command_queue &queue) + { + exec_1d(queue, 0, m_count); + } + + size_t m_count; +}; + +} // end detail namespace + +/// Calls \p function on each element in the range [\p first, \p last). +/// +/// \see transform() +template<class InputIterator, class UnaryFunction> +inline UnaryFunction for_each(InputIterator first, + InputIterator last, + UnaryFunction function, + command_queue &queue = system::default_queue()) +{ + detail::for_each_kernel<InputIterator, UnaryFunction> kernel(first, last, function); + + kernel.exec(queue); + + return function; +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_FOR_EACH_HPP diff --git a/boost/compute/algorithm/for_each_n.hpp b/boost/compute/algorithm/for_each_n.hpp new file mode 100644 index 0000000000..d0be784bf7 --- /dev/null +++ b/boost/compute/algorithm/for_each_n.hpp @@ -0,0 +1,35 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_FOR_EACH_N_HPP +#define BOOST_COMPUTE_ALGORITHM_FOR_EACH_N_HPP + +#include <boost/compute/algorithm/for_each.hpp> + +namespace boost { +namespace compute { + +/// Calls \p function on each element in the range [\p first, \p first +/// \c + \p count). +/// +/// \see for_each() +template<class InputIterator, class Size, class UnaryFunction> +inline UnaryFunction for_each_n(InputIterator first, + Size count, + UnaryFunction function, + command_queue &queue = system::default_queue()) +{ + return ::boost::compute::for_each(first, first + count, function, queue); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_FOR_EACH_N_HPP diff --git a/boost/compute/algorithm/gather.hpp b/boost/compute/algorithm/gather.hpp new file mode 100644 index 0000000000..b2f725d54e --- /dev/null +++ b/boost/compute/algorithm/gather.hpp @@ -0,0 +1,84 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_GATHER_HPP +#define BOOST_COMPUTE_ALGORITHM_GATHER_HPP + +#include <boost/compute/command_queue.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/exception.hpp> +#include <boost/compute/iterator/buffer_iterator.hpp> +#include <boost/compute/system.hpp> +#include <boost/compute/type_traits/type_name.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class InputIterator, class MapIterator, class OutputIterator> +class gather_kernel : public meta_kernel +{ +public: + gather_kernel() : meta_kernel("gather") + {} + + void set_range(MapIterator first, + MapIterator last, + InputIterator input, + OutputIterator result) + { + m_count = iterator_range_size(first, last); + m_offset = first.get_index(); + + *this << + "const uint i = get_global_id(0);\n" << + result[expr<uint_>("i")] << "=" << + input[first[expr<uint_>("i")]] << ";\n"; + } + + event exec(command_queue &queue) + { + if(m_count == 0) { + return event(); + } + + return exec_1d(queue, m_offset, m_count); + } + +private: + size_t m_count; + size_t m_offset; +}; + +} // end detail namespace + +/// Copies the elements using the indices from the range [\p first, \p last) +/// to the range beginning at \p result using the input values from the range +/// beginning at \p input. +/// +/// \see scatter() +template<class InputIterator, class MapIterator, class OutputIterator> +inline void gather(MapIterator first, + MapIterator last, + InputIterator input, + OutputIterator result, + command_queue &queue = system::default_queue()) +{ + detail::gather_kernel<InputIterator, MapIterator, OutputIterator> kernel; + + kernel.set_range(first, last, input, result); + kernel.exec(queue); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_GATHER_HPP diff --git a/boost/compute/algorithm/generate.hpp b/boost/compute/algorithm/generate.hpp new file mode 100644 index 0000000000..c70a542683 --- /dev/null +++ b/boost/compute/algorithm/generate.hpp @@ -0,0 +1,49 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_GENERATE_HPP +#define BOOST_COMPUTE_ALGORITHM_GENERATE_HPP + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/copy.hpp> +#include <boost/compute/iterator/function_input_iterator.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> + +namespace boost { +namespace compute { + +/// Stores the result of \p generator for each element in the range +/// [\p first, \p last). +template<class OutputIterator, class Generator> +inline void generate(OutputIterator first, + OutputIterator last, + Generator generator, + command_queue &queue = system::default_queue()) +{ + size_t count = detail::iterator_range_size(first, last); + if(count == 0){ + return; + } + + ::boost::compute::copy( + ::boost::compute::make_function_input_iterator(generator, + first.get_index()), + ::boost::compute::make_function_input_iterator(generator, + last.get_index()), + first, + queue + ); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_GENERATE_HPP diff --git a/boost/compute/algorithm/generate_n.hpp b/boost/compute/algorithm/generate_n.hpp new file mode 100644 index 0000000000..6d8e607b64 --- /dev/null +++ b/boost/compute/algorithm/generate_n.hpp @@ -0,0 +1,35 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_GENERATE_N_HPP +#define BOOST_COMPUTE_ALGORITHM_GENERATE_N_HPP + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/generate.hpp> + +namespace boost { +namespace compute { + +/// Stores the result of \p generator for each element in the range +/// [\p first, \p first + \p count). +template<class OutputIterator, class Size, class Generator> +inline void generate_n(OutputIterator first, + Size count, + Generator generator, + command_queue &queue = system::default_queue()) +{ + ::boost::compute::generate(first, first + count, generator, queue); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_GENERATE_N_HPP diff --git a/boost/compute/algorithm/includes.hpp b/boost/compute/algorithm/includes.hpp new file mode 100644 index 0000000000..c4e7c793e7 --- /dev/null +++ b/boost/compute/algorithm/includes.hpp @@ -0,0 +1,155 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_INCLUDES_HPP +#define BOOST_COMPUTE_ALGORITHM_INCLUDES_HPP + +#include <iterator> + +#include <boost/compute/algorithm/detail/balanced_path.hpp> +#include <boost/compute/algorithm/fill_n.hpp> +#include <boost/compute/algorithm/find.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/detail/read_write_single_value.hpp> +#include <boost/compute/system.hpp> + +namespace boost { +namespace compute { +namespace detail { + +/// +/// \brief Serial includes kernel class +/// +/// Subclass of meta_kernel to perform includes operation after tiling +/// +class serial_includes_kernel : meta_kernel +{ +public: + + serial_includes_kernel() : meta_kernel("includes") + { + + } + + template<class InputIterator1, class InputIterator2, + class InputIterator3, class InputIterator4, + class OutputIterator> + void set_range(InputIterator1 first1, + InputIterator2 first2, + InputIterator3 tile_first1, + InputIterator3 tile_last1, + InputIterator4 tile_first2, + OutputIterator result) + { + m_count = iterator_range_size(tile_first1, tile_last1) - 1; + + *this << + "uint i = get_global_id(0);\n" << + "uint start1 = " << tile_first1[expr<uint_>("i")] << ";\n" << + "uint end1 = " << tile_first1[expr<uint_>("i+1")] << ";\n" << + "uint start2 = " << tile_first2[expr<uint_>("i")] << ";\n" << + "uint end2 = " << tile_first2[expr<uint_>("i+1")] << ";\n" << + "uint includes = 1;\n" << + "while(start1<end1 && start2<end2)\n" << + "{\n" << + " if(" << first1[expr<uint_>("start1")] << " == " << + first2[expr<uint_>("start2")] << ")\n" << + " {\n" << + " start1++; start2++;\n" << + " }\n" << + " else if(" << first1[expr<uint_>("start1")] << " < " << + first2[expr<uint_>("start2")] << ")\n" << + " start1++;\n" << + " else\n" << + " {\n" << + " includes = 0;\n" << + " break;\n" << + " }\n" << + "}\n" << + "if(start2<end2)\n" << + " includes = 0;\n" << + result[expr<uint_>("i")] << " = includes;\n"; + } + + event exec(command_queue &queue) + { + if(m_count == 0) { + return event(); + } + + return exec_1d(queue, 0, m_count); + } + +private: + size_t m_count; +}; + +} //end detail namespace + +/// +/// \brief Includes algorithm +/// +/// Finds if the sorted range [first1, last1) includes the sorted +/// range [first2, last2). In other words, it checks if [first1, last1) is +/// a superset of [first2, last2). +/// +/// \return True, if [first1, last1) includes [first2, last2). False otherwise. +/// +/// \param first1 Iterator pointing to start of first set +/// \param last1 Iterator pointing to end of first set +/// \param first2 Iterator pointing to start of second set +/// \param last2 Iterator pointing to end of second set +/// \param queue Queue on which to execute +/// +template<class InputIterator1, class InputIterator2> +inline bool includes(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + command_queue &queue = system::default_queue()) +{ + size_t tile_size = 1024; + + size_t count1 = detail::iterator_range_size(first1, last1); + size_t count2 = detail::iterator_range_size(first2, last2); + + vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); + vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); + + // Tile the sets + detail::balanced_path_kernel tiling_kernel; + tiling_kernel.tile_size = static_cast<unsigned int>(tile_size); + tiling_kernel.set_range(first1, last1, first2, last2, + tile_a.begin()+1, tile_b.begin()+1); + fill_n(tile_a.begin(), 1, uint_(0), queue); + fill_n(tile_b.begin(), 1, uint_(0), queue); + tiling_kernel.exec(queue); + + fill_n(tile_a.end()-1, 1, static_cast<uint_>(count1), queue); + fill_n(tile_b.end()-1, 1, static_cast<uint_>(count2), queue); + + vector<uint_> result((count1+count2+tile_size-1)/tile_size, queue.get_context()); + + // Find individually + detail::serial_includes_kernel includes_kernel; + includes_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(), + tile_b.begin(), result.begin()); + + includes_kernel.exec(queue); + + return find(result.begin(), result.end(), 0, queue) == result.end(); +} + +} //end compute namespace +} //end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_SET_UNION_HPP diff --git a/boost/compute/algorithm/inclusive_scan.hpp b/boost/compute/algorithm/inclusive_scan.hpp new file mode 100644 index 0000000000..9f98beaf7c --- /dev/null +++ b/boost/compute/algorithm/inclusive_scan.hpp @@ -0,0 +1,81 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_INCLUSIVE_SCAN_HPP +#define BOOST_COMPUTE_ALGORITHM_INCLUSIVE_SCAN_HPP + +#include <boost/compute/functional.hpp> +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/detail/scan.hpp> + +namespace boost { +namespace compute { + +/// Performs an inclusive scan of the elements in the range [\p first, \p last) +/// and stores the results in the range beginning at \p result. +/// +/// Each element in the output is assigned to the sum of the current value in +/// the input with the sum of every previous value in the input. +/// +/// \param first first element in the range to scan +/// \param last last element in the range to scan +/// \param result first element in the result range +/// \param binary_op associative binary operator +/// \param queue command queue to perform the operation +/// +/// \return \c OutputIterator to the end of the result range +/// +/// The default operation is to add the elements up. +/// +/// \snippet test/test_scan.cpp inclusive_scan_int +/// +/// But different associative operation can be specified as \p binary_op +/// instead (e.g., multiplication, maximum, minimum). +/// +/// \snippet test/test_scan.cpp inclusive_scan_int_multiplies +/// +/// \see exclusive_scan() +template<class InputIterator, class OutputIterator, class BinaryOperator> +inline OutputIterator +inclusive_scan(InputIterator first, + InputIterator last, + OutputIterator result, + BinaryOperator binary_op, + command_queue &queue = system::default_queue()) +{ + typedef typename + std::iterator_traits<OutputIterator>::value_type output_type; + + return detail::scan(first, last, result, false, + output_type(0), binary_op, + queue); +} + +/// \overload +template<class InputIterator, class OutputIterator> +inline OutputIterator +inclusive_scan(InputIterator first, + InputIterator last, + OutputIterator result, + command_queue &queue = system::default_queue()) +{ + typedef typename + std::iterator_traits<OutputIterator>::value_type output_type; + + return detail::scan(first, last, result, false, + output_type(0), boost::compute::plus<output_type>(), + queue); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_INCLUSIVE_SCAN_HPP diff --git a/boost/compute/algorithm/inner_product.hpp b/boost/compute/algorithm/inner_product.hpp new file mode 100644 index 0000000000..614611f91e --- /dev/null +++ b/boost/compute/algorithm/inner_product.hpp @@ -0,0 +1,93 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_INNER_PRODUCT_HPP +#define BOOST_COMPUTE_ALGORITHM_INNER_PRODUCT_HPP + +#include <boost/compute/system.hpp> +#include <boost/compute/functional.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/accumulate.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/iterator/transform_iterator.hpp> +#include <boost/compute/iterator/zip_iterator.hpp> +#include <boost/compute/functional/detail/unpack.hpp> + +namespace boost { +namespace compute { + +/// Returns the inner product of the elements in the range +/// [\p first1, \p last1) with the elements in the range beginning +/// at \p first2. +template<class InputIterator1, class InputIterator2, class T> +inline T inner_product(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + T init, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<InputIterator1>::value_type input_type; + + ptrdiff_t n = std::distance(first1, last1); + + return ::boost::compute::accumulate( + ::boost::compute::make_transform_iterator( + ::boost::compute::make_zip_iterator( + boost::make_tuple(first1, first2) + ), + detail::unpack(multiplies<input_type>()) + ), + ::boost::compute::make_transform_iterator( + ::boost::compute::make_zip_iterator( + boost::make_tuple(last1, first2 + n) + ), + detail::unpack(multiplies<input_type>()) + ), + init, + queue + ); +} + +/// \overload +template<class InputIterator1, + class InputIterator2, + class T, + class BinaryAccumulateFunction, + class BinaryTransformFunction> +inline T inner_product(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + T init, + BinaryAccumulateFunction accumulate_function, + BinaryTransformFunction transform_function, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<InputIterator1>::value_type value_type; + + size_t count = detail::iterator_range_size(first1, last1); + vector<value_type> result(count, queue.get_context()); + transform(first1, + last1, + first2, + result.begin(), + transform_function, + queue); + + return ::boost::compute::accumulate(result.begin(), + result.end(), + init, + accumulate_function, + queue); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_INNER_PRODUCT_HPP diff --git a/boost/compute/algorithm/inplace_merge.hpp b/boost/compute/algorithm/inplace_merge.hpp new file mode 100644 index 0000000000..3080950df5 --- /dev/null +++ b/boost/compute/algorithm/inplace_merge.hpp @@ -0,0 +1,60 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_INPLACE_MERGE_HPP +#define BOOST_COMPUTE_ALGORITHM_INPLACE_MERGE_HPP + +#include <iterator> + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/merge.hpp> +#include <boost/compute/container/vector.hpp> + +namespace boost { +namespace compute { + +/// Merges the sorted values in the range [\p first, \p middle) with +/// the sorted values in the range [\p middle, \p last) in-place. +template<class Iterator> +inline void inplace_merge(Iterator first, + Iterator middle, + Iterator last, + command_queue &queue = system::default_queue()) +{ + BOOST_ASSERT(first < middle && middle < last); + + typedef typename std::iterator_traits<Iterator>::value_type T; + + const context &context = queue.get_context(); + + ptrdiff_t left_size = std::distance(first, middle); + ptrdiff_t right_size = std::distance(middle, last); + + vector<T> left(left_size, context); + vector<T> right(right_size, context); + + copy(first, middle, left.begin(), queue); + copy(middle, last, right.begin(), queue); + + ::boost::compute::merge( + left.begin(), + left.end(), + right.begin(), + right.end(), + first, + queue + ); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_INPLACE_MERGE_HPP diff --git a/boost/compute/algorithm/iota.hpp b/boost/compute/algorithm/iota.hpp new file mode 100644 index 0000000000..084c3d8d97 --- /dev/null +++ b/boost/compute/algorithm/iota.hpp @@ -0,0 +1,48 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_IOTA_HPP +#define BOOST_COMPUTE_ALGORITHM_IOTA_HPP + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/copy.hpp> +#include <boost/compute/iterator/counting_iterator.hpp> + +namespace boost { +namespace compute { + +/// Fills the range [\p first, \p last) with sequential values starting at +/// \p value. +/// +/// For example, the following code: +/// \snippet test/test_iota.cpp iota +/// +/// Will fill \c vec with the values (\c 0, \c 1, \c 2, \c ...). +template<class BufferIterator, class T> +inline void iota(BufferIterator first, + BufferIterator last, + const T &value, + command_queue &queue = system::default_queue()) +{ + T count = static_cast<T>(detail::iterator_range_size(first, last)); + + copy( + ::boost::compute::make_counting_iterator(value), + ::boost::compute::make_counting_iterator(value + count), + first, + queue + ); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_IOTA_HPP diff --git a/boost/compute/algorithm/is_partitioned.hpp b/boost/compute/algorithm/is_partitioned.hpp new file mode 100644 index 0000000000..3916825057 --- /dev/null +++ b/boost/compute/algorithm/is_partitioned.hpp @@ -0,0 +1,43 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_IS_PARTITIONED_HPP +#define BOOST_COMPUTE_ALGORITHM_IS_PARTITIONED_HPP + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/find_if.hpp> +#include <boost/compute/algorithm/find_if_not.hpp> + +namespace boost { +namespace compute { + +/// Returns \c true if the values in the range [\p first, \p last) +/// are partitioned according to \p predicate. +template<class InputIterator, class UnaryPredicate> +inline bool is_partitioned(InputIterator first, + InputIterator last, + UnaryPredicate predicate, + command_queue &queue = system::default_queue()) +{ + return ::boost::compute::find_if( + ::boost::compute::find_if_not(first, + last, + predicate, + queue), + last, + predicate, + queue) == last; +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_PARTITION_HPP diff --git a/boost/compute/algorithm/is_permutation.hpp b/boost/compute/algorithm/is_permutation.hpp new file mode 100644 index 0000000000..1e502efb37 --- /dev/null +++ b/boost/compute/algorithm/is_permutation.hpp @@ -0,0 +1,67 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_IS_PERMUTATION_HPP +#define BOOST_COMPUTE_ALGORITHM_IS_PERMUTATION_HPP + +#include <iterator> + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/algorithm/equal.hpp> +#include <boost/compute/algorithm/sort.hpp> + +namespace boost { +namespace compute { + +/// +/// \brief Permutation checking algorithm +/// +/// Checks if the range [first1, last1) can be permuted into the +/// range [first2, last2) +/// \return True, if it can be permuted. False, otherwise. +/// +/// \param first1 Iterator pointing to start of first range +/// \param last1 Iterator pointing to end of first range +/// \param first2 Iterator pointing to start of second range +/// \param last2 Iterator pointing to end of second range +/// \param queue Queue on which to execute +/// +template<class InputIterator1, class InputIterator2> +inline bool is_permutation(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<InputIterator1>::value_type value_type1; + typedef typename std::iterator_traits<InputIterator2>::value_type value_type2; + + size_t count1 = detail::iterator_range_size(first1, last1); + size_t count2 = detail::iterator_range_size(first2, last2); + + if(count1 != count2) return false; + + vector<value_type1> temp1(first1, last1, queue); + vector<value_type2> temp2(first2, last2, queue); + + sort(temp1.begin(), temp1.end(), queue); + sort(temp2.begin(), temp2.end(), queue); + + return equal(temp1.begin(), temp1.end(), + temp2.begin(), queue); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_IS_PERMUTATION_HPP diff --git a/boost/compute/algorithm/is_sorted.hpp b/boost/compute/algorithm/is_sorted.hpp new file mode 100644 index 0000000000..a605159ac3 --- /dev/null +++ b/boost/compute/algorithm/is_sorted.hpp @@ -0,0 +1,64 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_IS_SORTED_HPP +#define BOOST_COMPUTE_ALGORITHM_IS_SORTED_HPP + +#include <boost/compute/command_queue.hpp> +#include <boost/compute/system.hpp> +#include <boost/compute/functional/bind.hpp> +#include <boost/compute/functional/operator.hpp> +#include <boost/compute/algorithm/adjacent_find.hpp> + +namespace boost { +namespace compute { + +/// Returns \c true if the values in the range [\p first, \p last) +/// are in sorted order. +/// +/// \param first first element in the range to check +/// \param last last element in the range to check +/// \param compare comparison function (by default \c less) +/// \param queue command queue to perform the operation +/// +/// \return \c true if the range [\p first, \p last) is sorted +/// +/// \see sort() +template<class InputIterator, class Compare> +inline bool is_sorted(InputIterator first, + InputIterator last, + Compare compare, + command_queue &queue = system::default_queue()) +{ + using ::boost::compute::placeholders::_1; + using ::boost::compute::placeholders::_2; + + return ::boost::compute::adjacent_find( + first, last, ::boost::compute::bind(compare, _2, _1), queue + ) == last; +} + +/// \overload +template<class InputIterator> +inline bool is_sorted(InputIterator first, + InputIterator last, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<InputIterator>::value_type value_type; + + return ::boost::compute::is_sorted( + first, last, ::boost::compute::less<value_type>(), queue + ); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_IS_SORTED_HPP diff --git a/boost/compute/algorithm/lexicographical_compare.hpp b/boost/compute/algorithm/lexicographical_compare.hpp new file mode 100644 index 0000000000..c4f7120807 --- /dev/null +++ b/boost/compute/algorithm/lexicographical_compare.hpp @@ -0,0 +1,117 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Mageswaran.D <mageswaran1989@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#include <boost/compute/system.hpp> +#include <boost/compute/context.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/any_of.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/utility/program_cache.hpp> + +namespace boost { +namespace compute { + +namespace detail { + +const char lexicographical_compare_source[] = +"__kernel void lexicographical_compare(const uint size1,\n" +" const uint size2,\n" +" __global const T1 *range1,\n" +" __global const T2 *range2,\n" +" __global bool *result_buf)\n" +"{\n" +" const uint i = get_global_id(0);\n" +" if((i != size1) && (i != size2)){\n" + //Individual elements are compared and results are stored in parallel. + //0 is true +" if(range1[i] < range2[i])\n" +" result_buf[i] = 0;\n" +" else\n" +" result_buf[i] = 1;\n" +" }\n" +" else\n" +" result_buf[i] = !((i == size1) && (i != size2));\n" +"}\n"; + +template<class InputIterator1, class InputIterator2> +inline bool dispatch_lexicographical_compare(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + command_queue &queue) +{ + const boost::compute::context &context = queue.get_context(); + + boost::shared_ptr<program_cache> cache = + program_cache::get_global_cache(context); + + size_t iterator_size1 = iterator_range_size(first1, last1); + size_t iterator_size2 = iterator_range_size(first2, last2); + size_t max_size = (std::max)(iterator_size1, iterator_size2); + + if(max_size == 0){ + return false; + } + + boost::compute::vector<bool> result_vector(max_size, context); + + + typedef typename std::iterator_traits<InputIterator1>::value_type value_type1; + typedef typename std::iterator_traits<InputIterator2>::value_type value_type2; + + // load (or create) lexicographical compare program + std::string cache_key = + std::string("__boost_lexicographical_compare") + + type_name<value_type1>() + type_name<value_type2>(); + + std::stringstream options; + options << " -DT1=" << type_name<value_type1>(); + options << " -DT2=" << type_name<value_type2>(); + + program lexicographical_compare_program = cache->get_or_build( + cache_key, options.str(), lexicographical_compare_source, context + ); + + kernel lexicographical_compare_kernel(lexicographical_compare_program, + "lexicographical_compare"); + + lexicographical_compare_kernel.set_arg<uint_>(0, iterator_size1); + lexicographical_compare_kernel.set_arg<uint_>(1, iterator_size2); + lexicographical_compare_kernel.set_arg(2, first1.get_buffer()); + lexicographical_compare_kernel.set_arg(3, first2.get_buffer()); + lexicographical_compare_kernel.set_arg(4, result_vector.get_buffer()); + + queue.enqueue_1d_range_kernel(lexicographical_compare_kernel, + 0, + max_size, + 0); + + return boost::compute::any_of(result_vector.begin(), + result_vector.end(), + _1 == 0, + queue); +} + +} // end detail namespace + +/// Checks if the first range [first1, last1) is lexicographically +/// less than the second range [first2, last2). +template<class InputIterator1, class InputIterator2> +inline bool lexicographical_compare(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + command_queue &queue = system::default_queue()) +{ + return detail::dispatch_lexicographical_compare(first1, last1, first2, last2, queue); +} + +} // end compute namespace +} // end boost namespac diff --git a/boost/compute/algorithm/lower_bound.hpp b/boost/compute/algorithm/lower_bound.hpp new file mode 100644 index 0000000000..b2011c66ef --- /dev/null +++ b/boost/compute/algorithm/lower_bound.hpp @@ -0,0 +1,44 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_LOWER_BOUND_HPP +#define BOOST_COMPUTE_ALGORITHM_LOWER_BOUND_HPP + +#include <boost/compute/lambda.hpp> +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/detail/binary_find.hpp> + +namespace boost { +namespace compute { + +/// Returns an iterator pointing to the first element in the sorted +/// range [\p first, \p last) that is not less than \p value. +/// +/// \see upper_bound() +template<class InputIterator, class T> +inline InputIterator +lower_bound(InputIterator first, + InputIterator last, + const T &value, + command_queue &queue = system::default_queue()) +{ + using ::boost::compute::_1; + + InputIterator position = + detail::binary_find(first, last, _1 >= value, queue); + + return position; +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_LOWER_BOUND_HPP diff --git a/boost/compute/algorithm/max_element.hpp b/boost/compute/algorithm/max_element.hpp new file mode 100644 index 0000000000..55f2f7ffbf --- /dev/null +++ b/boost/compute/algorithm/max_element.hpp @@ -0,0 +1,74 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_MAX_ELEMENT_HPP +#define BOOST_COMPUTE_ALGORITHM_MAX_ELEMENT_HPP + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/functional.hpp> +#include <boost/compute/algorithm/detail/find_extrema.hpp> + +namespace boost { +namespace compute { + +/// Returns an iterator pointing to the element in the range +/// [\p first, \p last) with the maximum value. +/// +/// \param first first element in the input range +/// \param last last element in the input range +/// \param compare comparison function object which returns true if the first +/// argument is less than (i.e. is ordered before) the second. +/// \param queue command queue to perform the operation +/// +/// For example, to find \c int2 value with maximum first component in given vector: +/// \code +/// // comparison function object +/// BOOST_COMPUTE_FUNCTION(bool, compare_first, (const int2_ &a, const int2_ &b), +/// { +/// return a.x < b.x; +/// }); +/// +/// // create vector +/// boost::compute::vector<uint2_> data = ... +/// +/// boost::compute::vector<uint2_>::iterator max = +/// boost::compute::max_element(data.begin(), data.end(), compare_first, queue); +/// \endcode +/// +/// \see min_element() +template<class InputIterator, class Compare> +inline InputIterator +max_element(InputIterator first, + InputIterator last, + Compare compare, + command_queue &queue = system::default_queue()) +{ + return detail::find_extrema(first, last, compare, false, queue); +} + +///\overload +template<class InputIterator> +inline InputIterator +max_element(InputIterator first, + InputIterator last, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<InputIterator>::value_type value_type; + + return ::boost::compute::max_element( + first, last, ::boost::compute::less<value_type>(), queue + ); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_MAX_ELEMENT_HPP diff --git a/boost/compute/algorithm/merge.hpp b/boost/compute/algorithm/merge.hpp new file mode 100644 index 0000000000..875a283044 --- /dev/null +++ b/boost/compute/algorithm/merge.hpp @@ -0,0 +1,105 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_MERGE_HPP +#define BOOST_COMPUTE_ALGORITHM_MERGE_HPP + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/copy.hpp> +#include <boost/compute/algorithm/detail/merge_with_merge_path.hpp> +#include <boost/compute/algorithm/detail/serial_merge.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/detail/parameter_cache.hpp> + +namespace boost { +namespace compute { + +/// Merges the sorted values in the range [\p first1, \p last1) with the sorted +/// values in the range [\p first2, last2) and stores the result in the range +/// beginning at \p result. Values are compared using the \p comp function. If +/// no comparision function is given, \c less is used. +/// +/// \param first1 first element in the first range to merge +/// \param last1 last element in the first range to merge +/// \param first2 first element in the second range to merge +/// \param last2 last element in the second range to merge +/// \param result first element in the result range +/// \param comp comparison function (by default \c less) +/// \param queue command queue to perform the operation +/// +/// \return \c OutputIterator to the end of the result range +/// +/// \see inplace_merge() +template<class InputIterator1, + class InputIterator2, + class OutputIterator, + class Compare> +inline OutputIterator merge(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + Compare comp, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<InputIterator1>::value_type input1_type; + typedef typename std::iterator_traits<InputIterator2>::value_type input2_type; + typedef typename std::iterator_traits<OutputIterator>::value_type output_type; + + const device &device = queue.get_device(); + + std::string cache_key = + std::string("__boost_merge_") + type_name<input1_type>() + "_" + + type_name<input2_type>() + "_" + type_name<output_type>(); + boost::shared_ptr<detail::parameter_cache> parameters = + detail::parameter_cache::get_global_cache(device); + + // default serial merge threshold depends on device type + size_t default_serial_merge_threshold = 32768; + if(device.type() & device::gpu) { + default_serial_merge_threshold = 2048; + } + + // loading serial merge threshold parameter + const size_t serial_merge_threshold = + parameters->get(cache_key, "serial_merge_threshold", + static_cast<uint_>(default_serial_merge_threshold)); + + // choosing merge algorithm + const size_t total_count = + detail::iterator_range_size(first1, last1) + + detail::iterator_range_size(first2, last2); + // for small inputs serial merge turns out to outperform + // merge with merge path algorithm + if(total_count <= serial_merge_threshold){ + return detail::serial_merge(first1, last1, first2, last2, result, comp, queue); + } + return detail::merge_with_merge_path(first1, last1, first2, last2, result, comp, queue); +} + +/// \overload +template<class InputIterator1, class InputIterator2, class OutputIterator> +inline OutputIterator merge(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<InputIterator1>::value_type value_type; + less<value_type> less_than; + return merge(first1, last1, first2, last2, result, less_than, queue); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_MERGE_HPP diff --git a/boost/compute/algorithm/min_element.hpp b/boost/compute/algorithm/min_element.hpp new file mode 100644 index 0000000000..62744efb98 --- /dev/null +++ b/boost/compute/algorithm/min_element.hpp @@ -0,0 +1,74 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_MIN_ELEMENT_HPP +#define BOOST_COMPUTE_ALGORITHM_MIN_ELEMENT_HPP + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/functional.hpp> +#include <boost/compute/algorithm/detail/find_extrema.hpp> + +namespace boost { +namespace compute { + +/// Returns an iterator pointing to the element in range +/// [\p first, \p last) with the minimum value. +/// +/// \param first first element in the input range +/// \param last last element in the input range +/// \param compare comparison function object which returns true if the first +/// argument is less than (i.e. is ordered before) the second. +/// \param queue command queue to perform the operation +/// +/// For example, to find \c int2 value with minimum first component in given vector: +/// \code +/// // comparison function object +/// BOOST_COMPUTE_FUNCTION(bool, compare_first, (const int2_ &a, const int2_ &b), +/// { +/// return a.x < b.x; +/// }); +/// +/// // create vector +/// boost::compute::vector<uint2_> data = ... +/// +/// boost::compute::vector<uint2_>::iterator min = +/// boost::compute::min_element(data.begin(), data.end(), compare_first, queue); +/// \endcode +/// +/// \see max_element() +template<class InputIterator, class Compare> +inline InputIterator +min_element(InputIterator first, + InputIterator last, + Compare compare, + command_queue &queue = system::default_queue()) +{ + return detail::find_extrema(first, last, compare, true, queue); +} + +///\overload +template<class InputIterator> +inline InputIterator +min_element(InputIterator first, + InputIterator last, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<InputIterator>::value_type value_type; + + return ::boost::compute::min_element( + first, last, ::boost::compute::less<value_type>(), queue + ); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_MIN_ELEMENT_HPP diff --git a/boost/compute/algorithm/minmax_element.hpp b/boost/compute/algorithm/minmax_element.hpp new file mode 100644 index 0000000000..bf32c3c989 --- /dev/null +++ b/boost/compute/algorithm/minmax_element.hpp @@ -0,0 +1,70 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_MINMAX_ELEMENT_HPP +#define BOOST_COMPUTE_ALGORITHM_MINMAX_ELEMENT_HPP + +#include <utility> + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/max_element.hpp> +#include <boost/compute/algorithm/min_element.hpp> + +namespace boost { +namespace compute { + +/// Returns a pair of iterators with the first pointing to the minimum +/// element and the second pointing to the maximum element in the range +/// [\p first, \p last). +/// +/// \param first first element in the input range +/// \param last last element in the input range +/// \param compare comparison function object which returns true if the first +/// argument is less than (i.e. is ordered before) the second. +/// \param queue command queue to perform the operation +/// +/// \see max_element(), min_element() +template<class InputIterator, class Compare> +inline std::pair<InputIterator, InputIterator> +minmax_element(InputIterator first, + InputIterator last, + Compare compare, + command_queue &queue = system::default_queue()) +{ + if(first == last){ + // empty range + return std::make_pair(first, first); + } + + return std::make_pair(min_element(first, last, compare, queue), + max_element(first, last, compare, queue)); +} + +///\overload +template<class InputIterator, class Compare> +inline std::pair<InputIterator, InputIterator> +minmax_element(InputIterator first, + InputIterator last, + command_queue &queue = system::default_queue()) +{ + if(first == last){ + // empty range + return std::make_pair(first, first); + } + + return std::make_pair(min_element(first, last, queue), + max_element(first, last, queue)); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_MINMAX_ELEMENT_HPP diff --git a/boost/compute/algorithm/mismatch.hpp b/boost/compute/algorithm/mismatch.hpp new file mode 100644 index 0000000000..e7db883004 --- /dev/null +++ b/boost/compute/algorithm/mismatch.hpp @@ -0,0 +1,89 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_MISMATCH_HPP +#define BOOST_COMPUTE_ALGORITHM_MISMATCH_HPP + +#include <iterator> +#include <utility> + +#include <boost/compute/system.hpp> +#include <boost/compute/functional.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/find.hpp> +#include <boost/compute/iterator/transform_iterator.hpp> +#include <boost/compute/iterator/zip_iterator.hpp> +#include <boost/compute/functional/detail/unpack.hpp> + +namespace boost { +namespace compute { + +/// Returns a pair of iterators pointing to the first position where the +/// range [\p first1, \p last1) and the range starting at \p first2 +/// differ. +template<class InputIterator1, class InputIterator2> +inline std::pair<InputIterator1, InputIterator2> +mismatch(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<InputIterator1>::value_type value_type; + + ::boost::compute::equal_to<value_type> op; + + InputIterator2 last2 = first2 + std::distance(first1, last1); + + InputIterator1 iter = + boost::get<0>( + ::boost::compute::find( + ::boost::compute::make_transform_iterator( + ::boost::compute::make_zip_iterator( + boost::make_tuple(first1, first2) + ), + detail::unpack(op) + ), + ::boost::compute::make_transform_iterator( + ::boost::compute::make_zip_iterator( + boost::make_tuple(last1, last2) + ), + detail::unpack(op) + ), + false, + queue + ).base().get_iterator_tuple() + ); + + return std::make_pair(iter, first2 + std::distance(first1, iter)); +} + +/// \overload +template<class InputIterator1, class InputIterator2> +inline std::pair<InputIterator1, InputIterator2> +mismatch(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + command_queue &queue = system::default_queue()) +{ + if(std::distance(first1, last1) < std::distance(first2, last2)){ + return ::boost::compute::mismatch(first1, last1, first2, queue); + } + else { + return ::boost::compute::mismatch( + first1, first1 + std::distance(first2, last2), first2, queue + ); + } +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_MISMATCH_HPP diff --git a/boost/compute/algorithm/next_permutation.hpp b/boost/compute/algorithm/next_permutation.hpp new file mode 100644 index 0000000000..e81fbd2ee8 --- /dev/null +++ b/boost/compute/algorithm/next_permutation.hpp @@ -0,0 +1,170 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_NEXT_PERMUTATION_HPP +#define BOOST_COMPUTE_ALGORITHM_NEXT_PERMUTATION_HPP + +#include <iterator> + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/container/detail/scalar.hpp> +#include <boost/compute/algorithm/reverse.hpp> + +namespace boost { +namespace compute { +namespace detail { + +/// +/// \brief Helper function for next_permutation +/// +/// To find rightmost element which is smaller +/// than its next element +/// +template<class InputIterator> +inline InputIterator next_permutation_helper(InputIterator first, + InputIterator last, + command_queue &queue) +{ + typedef typename std::iterator_traits<InputIterator>::value_type value_type; + + size_t count = detail::iterator_range_size(first, last); + if(count == 0 || count == 1){ + return last; + } + count = count - 1; + const context &context = queue.get_context(); + + detail::meta_kernel k("next_permutation"); + size_t index_arg = k.add_arg<int *>(memory_object::global_memory, "index"); + atomic_max<int_> atomic_max_int; + + k << k.decl<const int_>("i") << " = get_global_id(0);\n" + << k.decl<const value_type>("cur_value") << "=" + << first[k.var<const int_>("i")] << ";\n" + << k.decl<const value_type>("next_value") << "=" + << first[k.expr<const int_>("i+1")] << ";\n" + << "if(cur_value < next_value){\n" + << " " << atomic_max_int(k.var<int_ *>("index"), k.var<int_>("i")) << ";\n" + << "}\n"; + + kernel kernel = k.compile(context); + + scalar<int_> index(context); + kernel.set_arg(index_arg, index.get_buffer()); + + index.write(static_cast<int_>(-1), queue); + + queue.enqueue_1d_range_kernel(kernel, 0, count, 0); + + int result = static_cast<int>(index.read(queue)); + if(result == -1) return last; + else return first + result; +} + +/// +/// \brief Helper function for next_permutation +/// +/// To find the smallest element to the right of the element found above +/// that is greater than it +/// +template<class InputIterator, class ValueType> +inline InputIterator np_ceiling(InputIterator first, + InputIterator last, + ValueType value, + command_queue &queue) +{ + typedef typename std::iterator_traits<InputIterator>::value_type value_type; + + size_t count = detail::iterator_range_size(first, last); + if(count == 0){ + return last; + } + const context &context = queue.get_context(); + + detail::meta_kernel k("np_ceiling"); + size_t index_arg = k.add_arg<int *>(memory_object::global_memory, "index"); + size_t value_arg = k.add_arg<value_type>(memory_object::private_memory, "value"); + atomic_max<int_> atomic_max_int; + + k << k.decl<const int_>("i") << " = get_global_id(0);\n" + << k.decl<const value_type>("cur_value") << "=" + << first[k.var<const int_>("i")] << ";\n" + << "if(cur_value <= " << first[k.expr<int_>("*index")] + << " && cur_value > value){\n" + << " " << atomic_max_int(k.var<int_ *>("index"), k.var<int_>("i")) << ";\n" + << "}\n"; + + kernel kernel = k.compile(context); + + scalar<int_> index(context); + kernel.set_arg(index_arg, index.get_buffer()); + + index.write(static_cast<int_>(0), queue); + + kernel.set_arg(value_arg, value); + + queue.enqueue_1d_range_kernel(kernel, 0, count, 0); + + int result = static_cast<int>(index.read(queue)); + return first + result; +} + +} // end detail namespace + +/// +/// \brief Permutation generating algorithm +/// +/// Transforms the range [first, last) into the next permutation from the +/// set of all permutations arranged in lexicographic order +/// \return Boolean value signifying if the last permutation was crossed +/// and the range was reset +/// +/// \param first Iterator pointing to start of range +/// \param last Iterator pointing to end of range +/// \param queue Queue on which to execute +/// +template<class InputIterator> +inline bool next_permutation(InputIterator first, + InputIterator last, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<InputIterator>::value_type value_type; + + if(first == last) return false; + + InputIterator first_element = + detail::next_permutation_helper(first, last, queue); + + if(first_element == last) + { + reverse(first, last, queue); + return false; + } + + value_type first_value = first_element.read(queue); + + InputIterator ceiling_element = + detail::np_ceiling(first_element + 1, last, first_value, queue); + + value_type ceiling_value = ceiling_element.read(queue); + + first_element.write(ceiling_value, queue); + ceiling_element.write(first_value, queue); + + reverse(first_element + 1, last, queue); + + return true; +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_NEXT_PERMUTATION_HPP diff --git a/boost/compute/algorithm/none_of.hpp b/boost/compute/algorithm/none_of.hpp new file mode 100644 index 0000000000..c25dd12a87 --- /dev/null +++ b/boost/compute/algorithm/none_of.hpp @@ -0,0 +1,36 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_NONE_OF_HPP +#define BOOST_COMPUTE_ALGORITHM_NONE_OF_HPP + +#include <boost/compute/system.hpp> +#include <boost/compute/algorithm/find_if.hpp> + +namespace boost { +namespace compute { + +/// Returns \c true if \p predicate returns \c true for none of the elements in +/// the range [\p first, \p last). +/// +/// \see all_of(), any_of() +template<class InputIterator, class UnaryPredicate> +inline bool none_of(InputIterator first, + InputIterator last, + UnaryPredicate predicate, + command_queue &queue = system::default_queue()) +{ + return ::boost::compute::find_if(first, last, predicate, queue) == last; +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_NONE_OF_HPP diff --git a/boost/compute/algorithm/nth_element.hpp b/boost/compute/algorithm/nth_element.hpp new file mode 100644 index 0000000000..68f7a3dbc0 --- /dev/null +++ b/boost/compute/algorithm/nth_element.hpp @@ -0,0 +1,87 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_NTH_ELEMENT_HPP +#define BOOST_COMPUTE_ALGORITHM_NTH_ELEMENT_HPP + +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/fill_n.hpp> +#include <boost/compute/algorithm/find.hpp> +#include <boost/compute/algorithm/partition.hpp> +#include <boost/compute/algorithm/sort.hpp> +#include <boost/compute/functional/bind.hpp> + +namespace boost { +namespace compute { + +/// Rearranges the elements in the range [\p first, \p last) such that +/// the \p nth element would be in that position in a sorted sequence. +template<class Iterator, class Compare> +inline void nth_element(Iterator first, + Iterator nth, + Iterator last, + Compare compare, + command_queue &queue = system::default_queue()) +{ + if(nth == last) return; + + typedef typename std::iterator_traits<Iterator>::value_type value_type; + + while(1) + { + value_type value = nth.read(queue); + + using boost::compute::placeholders::_1; + Iterator new_nth = partition( + first, last, ::boost::compute::bind(compare, _1, value), queue + ); + + Iterator old_nth = find(new_nth, last, value, queue); + + value_type new_value = new_nth.read(queue); + + fill_n(new_nth, 1, value, queue); + fill_n(old_nth, 1, new_value, queue); + + new_value = nth.read(queue); + + if(value == new_value) break; + + if(std::distance(first, nth) < std::distance(first, new_nth)) + { + last = new_nth; + } + else + { + first = new_nth; + } + } +} + +/// \overload +template<class Iterator> +inline void nth_element(Iterator first, + Iterator nth, + Iterator last, + command_queue &queue = system::default_queue()) +{ + if(nth == last) return; + + typedef typename std::iterator_traits<Iterator>::value_type value_type; + + less<value_type> less_than; + + return nth_element(first, nth, last, less_than, queue); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_NTH_ELEMENT_HPP diff --git a/boost/compute/algorithm/partial_sum.hpp b/boost/compute/algorithm/partial_sum.hpp new file mode 100644 index 0000000000..d440369a5a --- /dev/null +++ b/boost/compute/algorithm/partial_sum.hpp @@ -0,0 +1,37 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_PARTIAL_SUM_HPP +#define BOOST_COMPUTE_ALGORITHM_PARTIAL_SUM_HPP + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/inclusive_scan.hpp> + +namespace boost { +namespace compute { + +/// Calculates the cumulative sum of the elements in the range [\p first, +/// \p last) and writes the resulting values to the range beginning at +/// \p result. +template<class InputIterator, class OutputIterator> +inline OutputIterator +partial_sum(InputIterator first, + InputIterator last, + OutputIterator result, + command_queue &queue = system::default_queue()) +{ + return ::boost::compute::inclusive_scan(first, last, result, queue); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_PARTIAL_SUM_HPP diff --git a/boost/compute/algorithm/partition.hpp b/boost/compute/algorithm/partition.hpp new file mode 100644 index 0000000000..7860350e0d --- /dev/null +++ b/boost/compute/algorithm/partition.hpp @@ -0,0 +1,39 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_PARTITION_HPP +#define BOOST_COMPUTE_ALGORITHM_PARTITION_HPP + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/stable_partition.hpp> + +namespace boost { +namespace compute { + +/// +/// Partitions the elements in the range [\p first, \p last) according to +/// \p predicate. Order of the elements need not be preserved. +/// +/// \see is_partitioned() and stable_partition() +/// +template<class Iterator, class UnaryPredicate> +inline Iterator partition(Iterator first, + Iterator last, + UnaryPredicate predicate, + command_queue &queue = system::default_queue()) +{ + return stable_partition(first, last, predicate, queue); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_PARTITION_HPP diff --git a/boost/compute/algorithm/partition_copy.hpp b/boost/compute/algorithm/partition_copy.hpp new file mode 100644 index 0000000000..80a2c6475f --- /dev/null +++ b/boost/compute/algorithm/partition_copy.hpp @@ -0,0 +1,63 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_PARTITION_COPY_HPP +#define BOOST_COMPUTE_ALGORITHM_PARTITION_COPY_HPP + +#include <boost/compute/system.hpp> +#include <boost/compute/functional.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/copy_if.hpp> + +namespace boost { +namespace compute { + +/// Copies all of the elements in the range [\p first, \p last) for which +/// \p predicate returns \c true to the range beginning at \p first_true +/// and all of the elements for which \p predicate returns \c false to +/// the range beginning at \p first_false. +/// +/// \see partition() +template<class InputIterator, + class OutputIterator1, + class OutputIterator2, + class UnaryPredicate> +inline std::pair<OutputIterator1, OutputIterator2> +partition_copy(InputIterator first, + InputIterator last, + OutputIterator1 first_true, + OutputIterator2 first_false, + UnaryPredicate predicate, + command_queue &queue = system::default_queue()) +{ + // copy true values + OutputIterator1 last_true = + ::boost::compute::copy_if(first, + last, + first_true, + predicate, + queue); + + // copy false values + OutputIterator2 last_false = + ::boost::compute::copy_if(first, + last, + first_false, + not1(predicate), + queue); + + // return iterators to the end of the true and the false ranges + return std::make_pair(last_true, last_false); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_PARTITION_COPY_HPP diff --git a/boost/compute/algorithm/partition_point.hpp b/boost/compute/algorithm/partition_point.hpp new file mode 100644 index 0000000000..3cc2bc0ca6 --- /dev/null +++ b/boost/compute/algorithm/partition_point.hpp @@ -0,0 +1,46 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_PARTITION_POINT_HPP +#define BOOST_COMPUTE_ALGORITHM_PARTITION_POINT_HPP + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/detail/binary_find.hpp> + +namespace boost { +namespace compute { + +/// +/// \brief Partition point algorithm +/// +/// Finds the end of true values in the partitioned range [first, last) +/// \return Iterator pointing to end of true values +/// +/// \param first Iterator pointing to start of range +/// \param last Iterator pointing to end of range +/// \param predicate Unary predicate to be applied on each element +/// \param queue Queue on which to execute +/// +/// \see partition() and stable_partition() +/// +template<class InputIterator, class UnaryPredicate> +inline InputIterator partition_point(InputIterator first, + InputIterator last, + UnaryPredicate predicate, + command_queue &queue = system::default_queue()) +{ + return detail::binary_find(first, last, not1(predicate), queue); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_PARTITION_POINT_HPP diff --git a/boost/compute/algorithm/prev_permutation.hpp b/boost/compute/algorithm/prev_permutation.hpp new file mode 100644 index 0000000000..03c01bf8f4 --- /dev/null +++ b/boost/compute/algorithm/prev_permutation.hpp @@ -0,0 +1,170 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_PREV_PERMUTATION_HPP +#define BOOST_COMPUTE_ALGORITHM_PREV_PERMUTATION_HPP + +#include <iterator> + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/container/detail/scalar.hpp> +#include <boost/compute/algorithm/reverse.hpp> + +namespace boost { +namespace compute { +namespace detail { + +/// +/// \brief Helper function for prev_permutation +/// +/// To find rightmost element which is greater +/// than its next element +/// +template<class InputIterator> +inline InputIterator prev_permutation_helper(InputIterator first, + InputIterator last, + command_queue &queue) +{ + typedef typename std::iterator_traits<InputIterator>::value_type value_type; + + size_t count = detail::iterator_range_size(first, last); + if(count == 0 || count == 1){ + return last; + } + count = count - 1; + const context &context = queue.get_context(); + + detail::meta_kernel k("prev_permutation"); + size_t index_arg = k.add_arg<int *>(memory_object::global_memory, "index"); + atomic_max<int_> atomic_max_int; + + k << k.decl<const int_>("i") << " = get_global_id(0);\n" + << k.decl<const value_type>("cur_value") << "=" + << first[k.var<const int_>("i")] << ";\n" + << k.decl<const value_type>("next_value") << "=" + << first[k.expr<const int_>("i+1")] << ";\n" + << "if(cur_value > next_value){\n" + << " " << atomic_max_int(k.var<int_ *>("index"), k.var<int_>("i")) << ";\n" + << "}\n"; + + kernel kernel = k.compile(context); + + scalar<int_> index(context); + kernel.set_arg(index_arg, index.get_buffer()); + + index.write(static_cast<int_>(-1), queue); + + queue.enqueue_1d_range_kernel(kernel, 0, count, 0); + + int result = static_cast<int>(index.read(queue)); + if(result == -1) return last; + else return first + result; +} + +/// +/// \brief Helper function for prev_permutation +/// +/// To find the largest element to the right of the element found above +/// that is smaller than it +/// +template<class InputIterator, class ValueType> +inline InputIterator pp_floor(InputIterator first, + InputIterator last, + ValueType value, + command_queue &queue) +{ + typedef typename std::iterator_traits<InputIterator>::value_type value_type; + + size_t count = detail::iterator_range_size(first, last); + if(count == 0){ + return last; + } + const context &context = queue.get_context(); + + detail::meta_kernel k("pp_floor"); + size_t index_arg = k.add_arg<int *>(memory_object::global_memory, "index"); + size_t value_arg = k.add_arg<value_type>(memory_object::private_memory, "value"); + atomic_max<int_> atomic_max_int; + + k << k.decl<const int_>("i") << " = get_global_id(0);\n" + << k.decl<const value_type>("cur_value") << "=" + << first[k.var<const int_>("i")] << ";\n" + << "if(cur_value >= " << first[k.expr<int_>("*index")] + << " && cur_value < value){\n" + << " " << atomic_max_int(k.var<int_ *>("index"), k.var<int_>("i")) << ";\n" + << "}\n"; + + kernel kernel = k.compile(context); + + scalar<int_> index(context); + kernel.set_arg(index_arg, index.get_buffer()); + + index.write(static_cast<int_>(0), queue); + + kernel.set_arg(value_arg, value); + + queue.enqueue_1d_range_kernel(kernel, 0, count, 0); + + int result = static_cast<int>(index.read(queue)); + return first + result; +} + +} // end detail namespace + +/// +/// \brief Permutation generating algorithm +/// +/// Transforms the range [first, last) into the previous permutation from +/// the set of all permutations arranged in lexicographic order +/// \return Boolean value signifying if the first permutation was crossed +/// and the range was reset +/// +/// \param first Iterator pointing to start of range +/// \param last Iterator pointing to end of range +/// \param queue Queue on which to execute +/// +template<class InputIterator> +inline bool prev_permutation(InputIterator first, + InputIterator last, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<InputIterator>::value_type value_type; + + if(first == last) return false; + + InputIterator first_element = + detail::prev_permutation_helper(first, last, queue); + + if(first_element == last) + { + reverse(first, last, queue); + return false; + } + + value_type first_value = first_element.read(queue); + + InputIterator ceiling_element = + detail::pp_floor(first_element + 1, last, first_value, queue); + + value_type ceiling_value = ceiling_element.read(queue); + + first_element.write(ceiling_value, queue); + ceiling_element.write(first_value, queue); + + reverse(first_element + 1, last, queue); + + return true; +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_PREV_PERMUTATION_HPP diff --git a/boost/compute/algorithm/random_shuffle.hpp b/boost/compute/algorithm/random_shuffle.hpp new file mode 100644 index 0000000000..7d2d46a133 --- /dev/null +++ b/boost/compute/algorithm/random_shuffle.hpp @@ -0,0 +1,75 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_RANDOM_SHUFFLE_HPP +#define BOOST_COMPUTE_ALGORITHM_RANDOM_SHUFFLE_HPP + +#include <vector> +#include <algorithm> + +#include <boost/range/algorithm_ext/iota.hpp> + +#include <boost/compute/system.hpp> +#include <boost/compute/functional.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/algorithm/scatter.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> + +namespace boost { +namespace compute { + +/// Randomly shuffles the elements in the range [\p first, \p last). +/// +/// \see scatter() +template<class Iterator> +inline void random_shuffle(Iterator first, + Iterator last, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<Iterator>::value_type value_type; + + size_t count = detail::iterator_range_size(first, last); + if(count == 0){ + return; + } + + // generate shuffled indices on the host + std::vector<cl_uint> random_indices(count); + boost::iota(random_indices, 0); + std::random_shuffle(random_indices.begin(), random_indices.end()); + + // copy random indices to the device + const context &context = queue.get_context(); + vector<cl_uint> indices(count, context); + ::boost::compute::copy(random_indices.begin(), + random_indices.end(), + indices.begin(), + queue); + + // make a copy of the values on the device + vector<value_type> tmp(count, context); + ::boost::compute::copy(first, + last, + tmp.begin(), + queue); + + // write values to their new locations + ::boost::compute::scatter(tmp.begin(), + tmp.end(), + indices.begin(), + first, + queue); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_RANDOM_SHUFFLE_HPP diff --git a/boost/compute/algorithm/reduce.hpp b/boost/compute/algorithm/reduce.hpp new file mode 100644 index 0000000000..79624a0e50 --- /dev/null +++ b/boost/compute/algorithm/reduce.hpp @@ -0,0 +1,301 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_REDUCE_HPP +#define BOOST_COMPUTE_ALGORITHM_REDUCE_HPP + +#include <iterator> + +#include <boost/compute/system.hpp> +#include <boost/compute/functional.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/container/array.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/algorithm/copy_n.hpp> +#include <boost/compute/algorithm/detail/inplace_reduce.hpp> +#include <boost/compute/algorithm/detail/reduce_on_gpu.hpp> +#include <boost/compute/algorithm/detail/serial_reduce.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/memory/local_buffer.hpp> +#include <boost/compute/type_traits/result_of.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class InputIterator, class OutputIterator, class BinaryFunction> +size_t reduce(InputIterator first, + size_t count, + OutputIterator result, + size_t block_size, + BinaryFunction function, + command_queue &queue) +{ + typedef typename + std::iterator_traits<InputIterator>::value_type + input_type; + typedef typename + boost::compute::result_of<BinaryFunction(input_type, input_type)>::type + result_type; + + const context &context = queue.get_context(); + size_t block_count = count / 2 / block_size; + size_t total_block_count = + static_cast<size_t>(std::ceil(float(count) / 2.f / float(block_size))); + + if(block_count != 0){ + meta_kernel k("block_reduce"); + size_t output_arg = k.add_arg<result_type *>(memory_object::global_memory, "output"); + size_t block_arg = k.add_arg<input_type *>(memory_object::local_memory, "block"); + + k << + "const uint gid = get_global_id(0);\n" << + "const uint lid = get_local_id(0);\n" << + + // copy values to local memory + "block[lid] = " << + function(first[k.make_var<uint_>("gid*2+0")], + first[k.make_var<uint_>("gid*2+1")]) << ";\n" << + + // perform reduction + "for(uint i = 1; i < " << uint_(block_size) << "; i <<= 1){\n" << + " barrier(CLK_LOCAL_MEM_FENCE);\n" << + " uint mask = (i << 1) - 1;\n" << + " if((lid & mask) == 0){\n" << + " block[lid] = " << + function(k.expr<input_type>("block[lid]"), + k.expr<input_type>("block[lid+i]")) << ";\n" << + " }\n" << + "}\n" << + + // write block result to global output + "if(lid == 0)\n" << + " output[get_group_id(0)] = block[0];\n"; + + kernel kernel = k.compile(context); + kernel.set_arg(output_arg, result.get_buffer()); + kernel.set_arg(block_arg, local_buffer<input_type>(block_size)); + + queue.enqueue_1d_range_kernel(kernel, + 0, + block_count * block_size, + block_size); + } + + // serially reduce any leftovers + if(block_count * block_size * 2 < count){ + size_t last_block_start = block_count * block_size * 2; + + meta_kernel k("extra_serial_reduce"); + size_t count_arg = k.add_arg<uint_>("count"); + size_t offset_arg = k.add_arg<uint_>("offset"); + size_t output_arg = k.add_arg<result_type *>(memory_object::global_memory, "output"); + size_t output_offset_arg = k.add_arg<uint_>("output_offset"); + + k << + k.decl<result_type>("result") << " = \n" << + first[k.expr<uint_>("offset")] << ";\n" << + "for(uint i = offset + 1; i < count; i++)\n" << + " result = " << + function(k.var<result_type>("result"), + first[k.var<uint_>("i")]) << ";\n" << + "output[output_offset] = result;\n"; + + kernel kernel = k.compile(context); + kernel.set_arg(count_arg, static_cast<uint_>(count)); + kernel.set_arg(offset_arg, static_cast<uint_>(last_block_start)); + kernel.set_arg(output_arg, result.get_buffer()); + kernel.set_arg(output_offset_arg, static_cast<uint_>(block_count)); + + queue.enqueue_task(kernel); + } + + return total_block_count; +} + +template<class InputIterator, class BinaryFunction> +inline vector< + typename boost::compute::result_of< + BinaryFunction( + typename std::iterator_traits<InputIterator>::value_type, + typename std::iterator_traits<InputIterator>::value_type + ) + >::type +> +block_reduce(InputIterator first, + size_t count, + size_t block_size, + BinaryFunction function, + command_queue &queue) +{ + typedef typename + std::iterator_traits<InputIterator>::value_type + input_type; + typedef typename + boost::compute::result_of<BinaryFunction(input_type, input_type)>::type + result_type; + + const context &context = queue.get_context(); + size_t total_block_count = + static_cast<size_t>(std::ceil(float(count) / 2.f / float(block_size))); + vector<result_type> result_vector(total_block_count, context); + + reduce(first, count, result_vector.begin(), block_size, function, queue); + + return result_vector; +} + +template<class InputIterator, class OutputIterator, class BinaryFunction> +inline void generic_reduce(InputIterator first, + InputIterator last, + OutputIterator result, + BinaryFunction function, + command_queue &queue) +{ + typedef typename + std::iterator_traits<InputIterator>::value_type + input_type; + typedef typename + boost::compute::result_of<BinaryFunction(input_type, input_type)>::type + result_type; + + const device &device = queue.get_device(); + const context &context = queue.get_context(); + + size_t count = detail::iterator_range_size(first, last); + + if(device.type() & device::cpu){ + boost::compute::vector<result_type> value(1, context); + detail::serial_reduce(first, last, value.begin(), function, queue); + boost::compute::copy_n(value.begin(), 1, result, queue); + } + else { + size_t block_size = 256; + + // first pass + vector<result_type> results = detail::block_reduce(first, + count, + block_size, + function, + queue); + + if(results.size() > 1){ + detail::inplace_reduce(results.begin(), + results.end(), + function, + queue); + } + + boost::compute::copy_n(results.begin(), 1, result, queue); + } +} + +template<class InputIterator, class OutputIterator, class T> +inline void dispatch_reduce(InputIterator first, + InputIterator last, + OutputIterator result, + const plus<T> &function, + command_queue &queue) +{ + const context &context = queue.get_context(); + const device &device = queue.get_device(); + + // reduce to temporary buffer on device + array<T, 1> tmp(context); + if(device.type() & device::cpu){ + detail::serial_reduce(first, last, tmp.begin(), function, queue); + } + else { + reduce_on_gpu(first, last, tmp.begin(), function, queue); + } + + // copy to result iterator + copy_n(tmp.begin(), 1, result, queue); +} + +template<class InputIterator, class OutputIterator, class BinaryFunction> +inline void dispatch_reduce(InputIterator first, + InputIterator last, + OutputIterator result, + BinaryFunction function, + command_queue &queue) +{ + generic_reduce(first, last, result, function, queue); +} + +} // end detail namespace + +/// Returns the result of applying \p function to the elements in the +/// range [\p first, \p last). +/// +/// If no function is specified, \c plus will be used. +/// +/// \param first first element in the input range +/// \param last last element in the input range +/// \param result iterator pointing to the output +/// \param function binary reduction function +/// \param queue command queue to perform the operation +/// +/// The \c reduce() algorithm assumes that the binary reduction function is +/// associative. When used with non-associative functions the result may +/// be non-deterministic and vary in precision. Notably this affects the +/// \c plus<float>() function as floating-point addition is not associative +/// and may produce slightly different results than a serial algorithm. +/// +/// This algorithm supports both host and device iterators for the +/// result argument. This allows for values to be reduced and copied +/// to the host all with a single function call. +/// +/// For example, to calculate the sum of the values in a device vector and +/// copy the result to a value on the host: +/// +/// \snippet test/test_reduce.cpp sum_int +/// +/// Note that while the the \c reduce() algorithm is conceptually identical to +/// the \c accumulate() algorithm, its implementation is substantially more +/// efficient on parallel hardware. For more information, see the documentation +/// on the \c accumulate() algorithm. +/// +/// \see accumulate() +template<class InputIterator, class OutputIterator, class BinaryFunction> +inline void reduce(InputIterator first, + InputIterator last, + OutputIterator result, + BinaryFunction function, + command_queue &queue = system::default_queue()) +{ + if(first == last){ + return; + } + + detail::dispatch_reduce(first, last, result, function, queue); +} + +/// \overload +template<class InputIterator, class OutputIterator> +inline void reduce(InputIterator first, + InputIterator last, + OutputIterator result, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<InputIterator>::value_type T; + + if(first == last){ + return; + } + + detail::dispatch_reduce(first, last, result, plus<T>(), queue); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_REDUCE_HPP diff --git a/boost/compute/algorithm/reduce_by_key.hpp b/boost/compute/algorithm/reduce_by_key.hpp new file mode 100644 index 0000000000..87c73e887f --- /dev/null +++ b/boost/compute/algorithm/reduce_by_key.hpp @@ -0,0 +1,118 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_REDUCE_BY_KEY_HPP +#define BOOST_COMPUTE_ALGORITHM_REDUCE_BY_KEY_HPP + +#include <iterator> +#include <utility> + +#include <boost/compute/command_queue.hpp> +#include <boost/compute/device.hpp> +#include <boost/compute/functional.hpp> +#include <boost/compute/system.hpp> +#include <boost/compute/algorithm/detail/reduce_by_key.hpp> + +namespace boost { +namespace compute { + +/// The \c reduce_by_key() algorithm performs reduction for each contiguous +/// subsequence of values determinate by equivalent keys. +/// +/// Returns a pair of iterators at the end of the ranges [\p keys_result, keys_result_last) +/// and [\p values_result, \p values_result_last). +/// +/// If no function is specified, \c plus will be used. +/// If no predicate is specified, \c equal_to will be used. +/// +/// \param keys_first the first key +/// \param keys_last the last key +/// \param values_first the first input value +/// \param keys_result iterator pointing to the key output +/// \param values_result iterator pointing to the reduced value output +/// \param function binary reduction function +/// \param predicate binary predicate which returns true only if two keys are equal +/// \param queue command queue to perform the operation +/// +/// The \c reduce_by_key() algorithm assumes that the binary reduction function +/// is associative. When used with non-associative functions the result may +/// be non-deterministic and vary in precision. Notably this affects the +/// \c plus<float>() function as floating-point addition is not associative +/// and may produce slightly different results than a serial algorithm. +/// +/// For example, to calculate the sum of the values for each key: +/// +/// \snippet test/test_reduce_by_key.cpp reduce_by_key_int +/// +/// \see reduce() +template<class InputKeyIterator, class InputValueIterator, + class OutputKeyIterator, class OutputValueIterator, + class BinaryFunction, class BinaryPredicate> +inline std::pair<OutputKeyIterator, OutputValueIterator> +reduce_by_key(InputKeyIterator keys_first, + InputKeyIterator keys_last, + InputValueIterator values_first, + OutputKeyIterator keys_result, + OutputValueIterator values_result, + BinaryFunction function, + BinaryPredicate predicate, + command_queue &queue = system::default_queue()) +{ + return detail::dispatch_reduce_by_key(keys_first, keys_last, values_first, + keys_result, values_result, + function, predicate, + queue); +} + +/// \overload +template<class InputKeyIterator, class InputValueIterator, + class OutputKeyIterator, class OutputValueIterator, + class BinaryFunction> +inline std::pair<OutputKeyIterator, OutputValueIterator> +reduce_by_key(InputKeyIterator keys_first, + InputKeyIterator keys_last, + InputValueIterator values_first, + OutputKeyIterator keys_result, + OutputValueIterator values_result, + BinaryFunction function, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<InputKeyIterator>::value_type key_type; + + return reduce_by_key(keys_first, keys_last, values_first, + keys_result, values_result, + function, equal_to<key_type>(), + queue); +} + +/// \overload +template<class InputKeyIterator, class InputValueIterator, + class OutputKeyIterator, class OutputValueIterator> +inline std::pair<OutputKeyIterator, OutputValueIterator> +reduce_by_key(InputKeyIterator keys_first, + InputKeyIterator keys_last, + InputValueIterator values_first, + OutputKeyIterator keys_result, + OutputValueIterator values_result, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<InputKeyIterator>::value_type key_type; + typedef typename std::iterator_traits<InputValueIterator>::value_type value_type; + + return reduce_by_key(keys_first, keys_last, values_first, + keys_result, values_result, + plus<value_type>(), equal_to<key_type>(), + queue); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_REDUCE_BY_KEY_HPP diff --git a/boost/compute/algorithm/remove.hpp b/boost/compute/algorithm/remove.hpp new file mode 100644 index 0000000000..98feb1f9d8 --- /dev/null +++ b/boost/compute/algorithm/remove.hpp @@ -0,0 +1,54 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_REMOVE_HPP +#define BOOST_COMPUTE_ALGORITHM_REMOVE_HPP + +#include <boost/compute/lambda.hpp> +#include <boost/compute/system.hpp> +#include <boost/compute/algorithm/remove_if.hpp> +#include <boost/compute/type_traits/vector_size.hpp> + +namespace boost { +namespace compute { + +/// Removes each element equal to \p value in the range [\p first, +/// \p last). +/// +/// \see remove_if() +template<class Iterator, class T> +inline Iterator remove(Iterator first, + Iterator last, + const T &value, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<Iterator>::value_type value_type; + + using ::boost::compute::_1; + using ::boost::compute::lambda::all; + + if(vector_size<value_type>::value == 1){ + return ::boost::compute::remove_if(first, + last, + _1 == value, + queue); + } + else { + return ::boost::compute::remove_if(first, + last, + all(_1 == value), + queue); + } +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_REMOVE_HPP diff --git a/boost/compute/algorithm/remove_if.hpp b/boost/compute/algorithm/remove_if.hpp new file mode 100644 index 0000000000..5e416bef88 --- /dev/null +++ b/boost/compute/algorithm/remove_if.hpp @@ -0,0 +1,47 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_REMOVE_IF_HPP +#define BOOST_COMPUTE_ALGORITHM_REMOVE_IF_HPP + +#include <boost/compute/system.hpp> +#include <boost/compute/algorithm/copy_if.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/functional/logical.hpp> + +namespace boost { +namespace compute { + +/// Removes each element for which \p predicate returns \c true in the +/// range [\p first, \p last). +/// +/// \see remove() +template<class Iterator, class Predicate> +inline Iterator remove_if(Iterator first, + Iterator last, + Predicate predicate, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<Iterator>::value_type value_type; + + // temporary storage for the input data + ::boost::compute::vector<value_type> tmp(first, last, queue); + + return ::boost::compute::copy_if(tmp.begin(), + tmp.end(), + first, + not1(predicate), + queue); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_REMOVE_IF_HPP diff --git a/boost/compute/algorithm/replace.hpp b/boost/compute/algorithm/replace.hpp new file mode 100644 index 0000000000..fd649a2fad --- /dev/null +++ b/boost/compute/algorithm/replace.hpp @@ -0,0 +1,90 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_REPLACE_HPP +#define BOOST_COMPUTE_ALGORITHM_REPLACE_HPP + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class Iterator, class T> +class replace_kernel : public meta_kernel +{ +public: + replace_kernel() + : meta_kernel("replace") + { + m_count = 0; + } + + void set_range(Iterator first, Iterator last) + { + m_count = detail::iterator_range_size(first, last); + + *this << + "const uint i = get_global_id(0);\n" << + "if(" << first[var<cl_uint>("i")] << " == " << var<T>("old_value") << ")\n" << + " " << first[var<cl_uint>("i")] << '=' << var<T>("new_value") << ";\n"; + } + + void set_old_value(const T &old_value) + { + add_set_arg<T>("old_value", old_value); + } + + void set_new_value(const T &new_value) + { + add_set_arg<T>("new_value", new_value); + } + + void exec(command_queue &queue) + { + if(m_count == 0){ + // nothing to do + return; + } + + exec_1d(queue, 0, m_count); + } + +private: + size_t m_count; +}; + +} // end detail namespace + +/// Replaces each instance of \p old_value in the range [\p first, +/// \p last) with \p new_value. +template<class Iterator, class T> +inline void replace(Iterator first, + Iterator last, + const T &old_value, + const T &new_value, + command_queue &queue = system::default_queue()) +{ + detail::replace_kernel<Iterator, T> kernel; + + kernel.set_range(first, last); + kernel.set_old_value(old_value); + kernel.set_new_value(new_value); + + kernel.exec(queue); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_REPLACE_HPP diff --git a/boost/compute/algorithm/replace_copy.hpp b/boost/compute/algorithm/replace_copy.hpp new file mode 100644 index 0000000000..7224bd3ae6 --- /dev/null +++ b/boost/compute/algorithm/replace_copy.hpp @@ -0,0 +1,62 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_REPLACE_COPY_HPP +#define BOOST_COMPUTE_ALGORITHM_REPLACE_COPY_HPP + +#include <iterator> + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/copy.hpp> +#include <boost/compute/algorithm/replace.hpp> + +namespace boost { +namespace compute { + +/// Copies the value in the range [\p first, \p last) to the range +/// beginning at \p result while replacing each instance of \p old_value +/// with \p new_value. +/// +/// \see replace() +template<class InputIterator, class OutputIterator, class T> +inline OutputIterator +replace_copy(InputIterator first, + InputIterator last, + OutputIterator result, + const T &old_value, + const T &new_value, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<OutputIterator>::difference_type difference_type; + + difference_type count = std::distance(first, last); + if(count == 0){ + return result; + } + + // copy data to result + ::boost::compute::copy(first, last, result, queue); + + // replace in result + ::boost::compute::replace(result, + result + count, + old_value, + new_value, + queue); + + // return iterator to the end of result + return result + count; +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_REPLACE_COPY_HPP diff --git a/boost/compute/algorithm/reverse.hpp b/boost/compute/algorithm/reverse.hpp new file mode 100644 index 0000000000..b6a9e8098c --- /dev/null +++ b/boost/compute/algorithm/reverse.hpp @@ -0,0 +1,74 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_REVERSE_HPP +#define BOOST_COMPUTE_ALGORITHM_REVERSE_HPP + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class Iterator> +struct reverse_kernel : public meta_kernel +{ + reverse_kernel(Iterator first, Iterator last) + : meta_kernel("reverse") + { + typedef typename std::iterator_traits<Iterator>::value_type value_type; + + // store size of the range + m_size = detail::iterator_range_size(first, last); + add_set_arg<const cl_uint>("size", static_cast<const cl_uint>(m_size)); + + *this << + decl<cl_uint>("i") << " = get_global_id(0);\n" << + decl<cl_uint>("j") << " = size - get_global_id(0) - 1;\n" << + decl<value_type>("tmp") << "=" << first[var<cl_uint>("i")] << ";\n" << + first[var<cl_uint>("i")] << "=" << first[var<cl_uint>("j")] << ";\n" << + first[var<cl_uint>("j")] << "= tmp;\n"; + } + + void exec(command_queue &queue) + { + exec_1d(queue, 0, m_size / 2); + } + + size_t m_size; +}; + +} // end detail namespace + +/// Reverses the elements in the range [\p first, \p last). +/// +/// \see reverse_copy() +template<class Iterator> +inline void reverse(Iterator first, + Iterator last, + command_queue &queue = system::default_queue()) +{ + size_t count = detail::iterator_range_size(first, last); + if(count < 2){ + return; + } + + detail::reverse_kernel<Iterator> kernel(first, last); + + kernel.exec(queue); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_REVERSE_HPP diff --git a/boost/compute/algorithm/reverse_copy.hpp b/boost/compute/algorithm/reverse_copy.hpp new file mode 100644 index 0000000000..c839f44651 --- /dev/null +++ b/boost/compute/algorithm/reverse_copy.hpp @@ -0,0 +1,79 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_REVERSE_COPY_HPP +#define BOOST_COMPUTE_ALGORITHM_REVERSE_COPY_HPP + +#include <iterator> + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/copy.hpp> +#include <boost/compute/algorithm/reverse.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class Iterator, class OutputIterator> +struct reverse_copy_kernel : public meta_kernel +{ + reverse_copy_kernel(Iterator first, Iterator last, OutputIterator result) + : meta_kernel("reverse_copy") + { + // store size of the range + m_size = detail::iterator_range_size(first, last); + add_set_arg<const cl_uint>("size", static_cast<const cl_uint>(m_size)); + + *this << + decl<cl_uint>("i") << " = get_global_id(0);\n" << + decl<cl_uint>("j") << " = size - get_global_id(0) - 1;\n" << + result[var<cl_uint>("j")] << "=" << first[var<cl_uint>("i")] << ";\n"; + } + + void exec(command_queue &queue) + { + exec_1d(queue, 0, m_size); + } + + size_t m_size; +}; + +} // end detail namespace + +/// Copies the elements in the range [\p first, \p last) in reversed +/// order to the range beginning at \p result. +/// +/// \see reverse() +template<class InputIterator, class OutputIterator> +inline OutputIterator +reverse_copy(InputIterator first, + InputIterator last, + OutputIterator result, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<OutputIterator>::difference_type difference_type; + + difference_type count = std::distance(first, last); + + detail::reverse_copy_kernel<InputIterator, OutputIterator> + kernel(first, last, result); + + // run kernel + kernel.exec(queue); + + // return iterator to the end of result + return result + count; +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_REVERSE_COPY_HPP diff --git a/boost/compute/algorithm/rotate.hpp b/boost/compute/algorithm/rotate.hpp new file mode 100644 index 0000000000..54cb073cc2 --- /dev/null +++ b/boost/compute/algorithm/rotate.hpp @@ -0,0 +1,54 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_ROTATE_HPP +#define BOOST_COMPUTE_ALGORITHM_ROTATE_HPP + +#include <boost/compute/system.hpp> +#include <boost/compute/algorithm/copy.hpp> +#include <boost/compute/container/vector.hpp> + +namespace boost { +namespace compute { + +/// Performs left rotation such that element at \p n_first comes to the +/// beginning. +/// +/// \see rotate_copy() +template<class InputIterator> +inline void rotate(InputIterator first, + InputIterator n_first, + InputIterator last, + command_queue &queue = system::default_queue()) +{ + //Handle trivial cases + if (n_first==first || n_first==last) + { + return; + } + + //Handle others + typedef typename std::iterator_traits<InputIterator>::value_type T; + + size_t count = detail::iterator_range_size(first, n_first); + size_t count2 = detail::iterator_range_size(first, last); + + const context &context = queue.get_context(); + vector<T> temp(count2, context); + ::boost::compute::copy(first, last, temp.begin(), queue); + + ::boost::compute::copy(temp.begin()+count, temp.end(), first, queue); + ::boost::compute::copy(temp.begin(), temp.begin()+count, last-count, queue); +} + +} //end compute namespace +} //end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_ROTATE_HPP diff --git a/boost/compute/algorithm/rotate_copy.hpp b/boost/compute/algorithm/rotate_copy.hpp new file mode 100644 index 0000000000..fa1b44c5e5 --- /dev/null +++ b/boost/compute/algorithm/rotate_copy.hpp @@ -0,0 +1,41 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_ROTATE_COPY_HPP +#define BOOST_COMPUTE_ALGORITHM_ROTATE_COPY_HPP + +#include <boost/compute/system.hpp> +#include <boost/compute/algorithm/copy.hpp> + +namespace boost { +namespace compute { + +/// Performs left rotation such that element at n_first comes to the +/// beginning and the output is stored in range starting at result. +/// +/// \see rotate() +template<class InputIterator, class OutputIterator> +inline void rotate_copy(InputIterator first, + InputIterator n_first, + InputIterator last, + OutputIterator result, + command_queue &queue = system::default_queue()) +{ + size_t count = detail::iterator_range_size(first, n_first); + size_t count2 = detail::iterator_range_size(n_first, last); + + ::boost::compute::copy(first+count, last, result, queue); + ::boost::compute::copy(first, first+count, result+count2, queue); +} + +} //end compute namespace +} //end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_ROTATE_COPY_HPP diff --git a/boost/compute/algorithm/scatter.hpp b/boost/compute/algorithm/scatter.hpp new file mode 100644 index 0000000000..bea4201628 --- /dev/null +++ b/boost/compute/algorithm/scatter.hpp @@ -0,0 +1,99 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_SCATTER_HPP +#define BOOST_COMPUTE_ALGORITHM_SCATTER_HPP + +#include <boost/algorithm/string/replace.hpp> + +#include <boost/compute/system.hpp> +#include <boost/compute/exception.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/iterator/buffer_iterator.hpp> +#include <boost/compute/type_traits/type_name.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/detail/meta_kernel.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class InputIterator, class MapIterator, class OutputIterator> +class scatter_kernel : meta_kernel +{ +public: + scatter_kernel() : meta_kernel("scatter") + {} + + void set_range(InputIterator first, + InputIterator last, + MapIterator map, + OutputIterator result) + { + m_count = iterator_range_size(first, last); + m_input_offset = first.get_index(); + m_output_offset = result.get_index(); + + m_input_offset_arg = add_arg<uint_>("input_offset"); + m_output_offset_arg = add_arg<uint_>("output_offset"); + + *this << + "const uint i = get_global_id(0);\n" << + "uint i1 = " << map[expr<uint_>("i")] << + " + output_offset;\n" << + "uint i2 = i + input_offset;\n" << + result[expr<uint_>("i1")] << "=" << + first[expr<uint_>("i2")] << ";\n"; + } + + event exec(command_queue &queue) + { + if(m_count == 0) { + return event(); + } + + set_arg(m_input_offset_arg, uint_(m_input_offset)); + set_arg(m_output_offset_arg, uint_(m_output_offset)); + + return exec_1d(queue, 0, m_count); + } + +private: + size_t m_count; + size_t m_input_offset; + size_t m_input_offset_arg; + size_t m_output_offset; + size_t m_output_offset_arg; +}; + +} // end detail namespace + +/// Copies the elements from the range [\p first, \p last) to the range +/// beginning at \p result using the output indices from the range beginning +/// at \p map. +/// +/// \see gather() +template<class InputIterator, class MapIterator, class OutputIterator> +inline void scatter(InputIterator first, + InputIterator last, + MapIterator map, + OutputIterator result, + command_queue &queue = system::default_queue()) +{ + detail::scatter_kernel<InputIterator, MapIterator, OutputIterator> kernel; + + kernel.set_range(first, last, map, result); + kernel.exec(queue); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_SCATTER_HPP diff --git a/boost/compute/algorithm/scatter_if.hpp b/boost/compute/algorithm/scatter_if.hpp new file mode 100644 index 0000000000..159edd8c86 --- /dev/null +++ b/boost/compute/algorithm/scatter_if.hpp @@ -0,0 +1,119 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2015 Jakub Pola <jakub.pola@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_SCATTER_IF_HPP +#define BOOST_COMPUTE_ALGORITHM_SCATTER_IF_HPP + +#include <boost/algorithm/string/replace.hpp> + +#include <boost/compute/system.hpp> +#include <boost/compute/exception.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/iterator/buffer_iterator.hpp> +#include <boost/compute/type_traits/type_name.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/detail/meta_kernel.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class InputIterator, class MapIterator, class StencilIterator, class OutputIterator, class Predicate> +class scatter_if_kernel : meta_kernel +{ +public: + scatter_if_kernel() : meta_kernel("scatter_if") + {} + + void set_range(InputIterator first, + InputIterator last, + MapIterator map, + StencilIterator stencil, + OutputIterator result, + Predicate predicate) + { + m_count = iterator_range_size(first, last); + m_input_offset = first.get_index(); + m_output_offset = result.get_index(); + + m_input_offset_arg = add_arg<uint_>("input_offset"); + m_output_offset_arg = add_arg<uint_>("output_offset"); + + *this << + "const uint i = get_global_id(0);\n" << + "uint i1 = " << map[expr<uint_>("i")] << + " + output_offset;\n" << + "uint i2 = i + input_offset;\n" << + if_(predicate(stencil[expr<uint_>("i")])) << "\n" << + result[expr<uint_>("i1")] << "=" << + first[expr<uint_>("i2")] << ";\n"; + } + + event exec(command_queue &queue) + { + if(m_count == 0) { + return event(); + } + + set_arg(m_input_offset_arg, uint_(m_input_offset)); + set_arg(m_output_offset_arg, uint_(m_output_offset)); + + return exec_1d(queue, 0, m_count); + } + +private: + size_t m_count; + size_t m_input_offset; + size_t m_input_offset_arg; + size_t m_output_offset; + size_t m_output_offset_arg; +}; + +} // end detail namespace + +/// Copies the elements from the range [\p first, \p last) to the range +/// beginning at \p result using the output indices from the range beginning +/// at \p map if stencil is resolved to true. By default the predicate is +/// an identity +/// +/// +template<class InputIterator, class MapIterator, class StencilIterator, class OutputIterator, + class Predicate> +inline void scatter_if(InputIterator first, + InputIterator last, + MapIterator map, + StencilIterator stencil, + OutputIterator result, + Predicate predicate, + command_queue &queue = system::default_queue()) +{ + detail::scatter_if_kernel<InputIterator, MapIterator, StencilIterator, OutputIterator, Predicate> kernel; + + kernel.set_range(first, last, map, stencil, result, predicate); + kernel.exec(queue); +} + +template<class InputIterator, class MapIterator, class StencilIterator, class OutputIterator> +inline void scatter_if(InputIterator first, + InputIterator last, + MapIterator map, + StencilIterator stencil, + OutputIterator result, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<StencilIterator>::value_type T; + + scatter_if(first, last, map, stencil, result, identity<T>(), queue); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_SCATTER_IF_HPP diff --git a/boost/compute/algorithm/search.hpp b/boost/compute/algorithm/search.hpp new file mode 100644 index 0000000000..3d3d035b3c --- /dev/null +++ b/boost/compute/algorithm/search.hpp @@ -0,0 +1,73 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_SEARCH_HPP +#define BOOST_COMPUTE_ALGORITHM_SEARCH_HPP + +#include <boost/compute/algorithm/detail/search_all.hpp> +#include <boost/compute/algorithm/find.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/system.hpp> + +namespace boost { +namespace compute { + +/// +/// \brief Substring matching algorithm +/// +/// Searches for the first match of the pattern [p_first, p_last) +/// in text [t_first, t_last). +/// \return Iterator pointing to beginning of first occurrence +/// +/// \param t_first Iterator pointing to start of text +/// \param t_last Iterator pointing to end of text +/// \param p_first Iterator pointing to start of pattern +/// \param p_last Iterator pointing to end of pattern +/// \param queue Queue on which to execute +/// +template<class TextIterator, class PatternIterator> +inline TextIterator search(TextIterator t_first, + TextIterator t_last, + PatternIterator p_first, + PatternIterator p_last, + command_queue &queue = system::default_queue()) +{ + // there is no need to check if pattern starts at last n - 1 indices + vector<uint_> matching_indices( + detail::iterator_range_size(t_first, t_last) + - detail::iterator_range_size(p_first, p_last) + 1, + queue.get_context() + ); + + // search_kernel puts value 1 at every index in vector where pattern starts at + detail::search_kernel<PatternIterator, + TextIterator, + vector<uint_>::iterator> kernel; + + kernel.set_range(p_first, p_last, t_first, t_last, matching_indices.begin()); + kernel.exec(queue); + + vector<uint_>::iterator index = ::boost::compute::find( + matching_indices.begin(), matching_indices.end(), uint_(1), queue + ); + + // pattern was not found + if(index == matching_indices.end()) + return t_last; + + return t_first + detail::iterator_range_size(matching_indices.begin(), index); +} + +} //end compute namespace +} //end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_SEARCH_HPP diff --git a/boost/compute/algorithm/search_n.hpp b/boost/compute/algorithm/search_n.hpp new file mode 100644 index 0000000000..9e03111bb0 --- /dev/null +++ b/boost/compute/algorithm/search_n.hpp @@ -0,0 +1,140 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SEARCH_N_HPP +#define BOOST_COMPUTE_ALGORITHM_DETAIL_SEARCH_N_HPP + +#include <iterator> + +#include <boost/compute/algorithm/find.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/system.hpp> + +namespace boost { +namespace compute { +namespace detail { + +/// +/// \brief Search kernel class +/// +/// Subclass of meta_kernel which is capable of performing search_n +/// +template<class TextIterator, class OutputIterator> +class search_n_kernel : public meta_kernel +{ +public: + typedef typename std::iterator_traits<TextIterator>::value_type value_type; + + search_n_kernel() : meta_kernel("search_n") + {} + + void set_range(TextIterator t_first, + TextIterator t_last, + value_type value, + size_t n, + OutputIterator result) + { + m_n = n; + m_n_arg = add_arg<uint_>("n"); + + m_value = value; + m_value_arg = add_arg<value_type>("value"); + + m_count = iterator_range_size(t_first, t_last); + m_count = m_count + 1 - m_n; + + *this << + "uint i = get_global_id(0);\n" << + "uint i1 = i;\n" << + "uint j;\n" << + "for(j = 0; j<n; j++,i++)\n" << + "{\n" << + " if(value != " << t_first[expr<uint_>("i")] << ")\n" << + " j = n + 1;\n" << + "}\n" << + "if(j == n)\n" << + result[expr<uint_>("i1")] << " = 1;\n" << + "else\n" << + result[expr<uint_>("i1")] << " = 0;\n"; + } + + event exec(command_queue &queue) + { + if(m_count == 0) { + return event(); + } + + set_arg(m_n_arg, uint_(m_n)); + set_arg(m_value_arg, m_value); + + return exec_1d(queue, 0, m_count); + } + +private: + size_t m_n; + size_t m_n_arg; + size_t m_count; + value_type m_value; + size_t m_value_arg; +}; + +} //end detail namespace + +/// +/// \brief Substring matching algorithm +/// +/// Searches for the first occurrence of n consecutive occurrences of +/// value in text [t_first, t_last). +/// \return Iterator pointing to beginning of first occurrence +/// +/// \param t_first Iterator pointing to start of text +/// \param t_last Iterator pointing to end of text +/// \param n Number of times value repeats +/// \param value Value which repeats +/// \param queue Queue on which to execute +/// +template<class TextIterator, class ValueType> +inline TextIterator search_n(TextIterator t_first, + TextIterator t_last, + size_t n, + ValueType value, + command_queue &queue = system::default_queue()) +{ + // there is no need to check if pattern starts at last n - 1 indices + vector<uint_> matching_indices( + detail::iterator_range_size(t_first, t_last) + 1 - n, + queue.get_context() + ); + + // search_n_kernel puts value 1 at every index in vector where pattern + // of n values starts at + detail::search_n_kernel<TextIterator, + vector<uint_>::iterator> kernel; + + kernel.set_range(t_first, t_last, value, n, matching_indices.begin()); + kernel.exec(queue); + + vector<uint_>::iterator index = ::boost::compute::find( + matching_indices.begin(), matching_indices.end(), uint_(1), queue + ); + + // pattern was not found + if(index == matching_indices.end()) + return t_last; + + return t_first + detail::iterator_range_size(matching_indices.begin(), index); +} + +} //end compute namespace +} //end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SEARCH_N_HPP diff --git a/boost/compute/algorithm/set_difference.hpp b/boost/compute/algorithm/set_difference.hpp new file mode 100644 index 0000000000..17ce7bd3f6 --- /dev/null +++ b/boost/compute/algorithm/set_difference.hpp @@ -0,0 +1,182 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_SET_DIFFERENCE_HPP +#define BOOST_COMPUTE_ALGORITHM_SET_DIFFERENCE_HPP + +#include <iterator> + +#include <boost/compute/algorithm/detail/compact.hpp> +#include <boost/compute/algorithm/detail/balanced_path.hpp> +#include <boost/compute/algorithm/exclusive_scan.hpp> +#include <boost/compute/algorithm/fill_n.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/system.hpp> + +namespace boost { +namespace compute { +namespace detail { + +/// +/// \brief Serial set difference kernel class +/// +/// Subclass of meta_kernel to perform serial set difference after tiling +/// +class serial_set_difference_kernel : meta_kernel +{ +public: + unsigned int tile_size; + + serial_set_difference_kernel() : meta_kernel("set_difference") + { + tile_size = 4; + } + + template<class InputIterator1, class InputIterator2, + class InputIterator3, class InputIterator4, + class OutputIterator1, class OutputIterator2> + void set_range(InputIterator1 first1, + InputIterator2 first2, + InputIterator3 tile_first1, + InputIterator3 tile_last1, + InputIterator4 tile_first2, + OutputIterator1 result, + OutputIterator2 counts) + { + m_count = iterator_range_size(tile_first1, tile_last1) - 1; + + *this << + "uint i = get_global_id(0);\n" << + "uint start1 = " << tile_first1[expr<uint_>("i")] << ";\n" << + "uint end1 = " << tile_first1[expr<uint_>("i+1")] << ";\n" << + "uint start2 = " << tile_first2[expr<uint_>("i")] << ";\n" << + "uint end2 = " << tile_first2[expr<uint_>("i+1")] << ";\n" << + "uint index = i*" << tile_size << ";\n" << + "uint count = 0;\n" << + "while(start1<end1 && start2<end2)\n" << + "{\n" << + " if(" << first1[expr<uint_>("start1")] << " == " << + first2[expr<uint_>("start2")] << ")\n" << + " {\n" << + " start1++; start2++;\n" << + " }\n" << + " else if(" << first1[expr<uint_>("start1")] << " < " << + first2[expr<uint_>("start2")] << ")\n" << + " {\n" << + result[expr<uint_>("index")] << + " = " << first1[expr<uint_>("start1")] << ";\n" << + " index++; count++;\n" << + " start1++;\n" << + " }\n" << + " else\n" << + " {\n" << + " start2++;\n" << + " }\n" << + "}\n" << + "while(start1<end1)\n" << + "{\n" << + result[expr<uint_>("index")] << + " = " << first1[expr<uint_>("start1")] << ";\n" << + " index++; count++;\n" << + " start1++;\n" << + "}\n" << + counts[expr<uint_>("i")] << " = count;\n"; + } + + event exec(command_queue &queue) + { + if(m_count == 0) { + return event(); + } + + return exec_1d(queue, 0, m_count); + } + +private: + size_t m_count; +}; + +} //end detail namespace + +/// +/// \brief Set difference algorithm +/// +/// Finds the difference of the sorted range [first2, last2) from the sorted +/// range [first1, last1) and stores it in range starting at result +/// \return Iterator pointing to end of difference +/// +/// \param first1 Iterator pointing to start of first set +/// \param last1 Iterator pointing to end of first set +/// \param first2 Iterator pointing to start of second set +/// \param last2 Iterator pointing to end of second set +/// \param result Iterator pointing to start of range in which the difference +/// will be stored +/// \param queue Queue on which to execute +/// +template<class InputIterator1, class InputIterator2, class OutputIterator> +inline OutputIterator set_difference(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<InputIterator1>::value_type value_type; + + int tile_size = 1024; + + int count1 = detail::iterator_range_size(first1, last1); + int count2 = detail::iterator_range_size(first2, last2); + + vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); + vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); + + // Tile the sets + detail::balanced_path_kernel tiling_kernel; + tiling_kernel.tile_size = tile_size; + tiling_kernel.set_range(first1, last1, first2, last2, + tile_a.begin()+1, tile_b.begin()+1); + fill_n(tile_a.begin(), 1, 0, queue); + fill_n(tile_b.begin(), 1, 0, queue); + tiling_kernel.exec(queue); + + fill_n(tile_a.end()-1, 1, count1, queue); + fill_n(tile_b.end()-1, 1, count2, queue); + + vector<value_type> temp_result(count1+count2, queue.get_context()); + vector<uint_> counts((count1+count2+tile_size-1)/tile_size + 1, queue.get_context()); + fill_n(counts.end()-1, 1, 0, queue); + + // Find individual differences + detail::serial_set_difference_kernel difference_kernel; + difference_kernel.tile_size = tile_size; + difference_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(), + tile_b.begin(), temp_result.begin(), counts.begin()); + + difference_kernel.exec(queue); + + exclusive_scan(counts.begin(), counts.end(), counts.begin(), queue); + + // Compact the results + detail::compact_kernel compact_kernel; + compact_kernel.tile_size = tile_size; + compact_kernel.set_range(temp_result.begin(), counts.begin(), counts.end(), result); + + compact_kernel.exec(queue); + + return result + (counts.end() - 1).read(queue); +} + +} //end compute namespace +} //end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_SET_DIFFERENCE_HPP diff --git a/boost/compute/algorithm/set_intersection.hpp b/boost/compute/algorithm/set_intersection.hpp new file mode 100644 index 0000000000..50f291e84a --- /dev/null +++ b/boost/compute/algorithm/set_intersection.hpp @@ -0,0 +1,170 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_SET_INTERSECTION_HPP +#define BOOST_COMPUTE_ALGORITHM_SET_INTERSECTION_HPP + +#include <iterator> + +#include <boost/compute/algorithm/detail/compact.hpp> +#include <boost/compute/algorithm/detail/balanced_path.hpp> +#include <boost/compute/algorithm/exclusive_scan.hpp> +#include <boost/compute/algorithm/fill_n.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/system.hpp> + +namespace boost { +namespace compute { +namespace detail { + +/// +/// \brief Serial set intersection kernel class +/// +/// Subclass of meta_kernel to perform serial set intersection after tiling +/// +class serial_set_intersection_kernel : meta_kernel +{ +public: + unsigned int tile_size; + + serial_set_intersection_kernel() : meta_kernel("set_intersection") + { + tile_size = 4; + } + + template<class InputIterator1, class InputIterator2, + class InputIterator3, class InputIterator4, + class OutputIterator1, class OutputIterator2> + void set_range(InputIterator1 first1, + InputIterator2 first2, + InputIterator3 tile_first1, + InputIterator3 tile_last1, + InputIterator4 tile_first2, + OutputIterator1 result, + OutputIterator2 counts) + { + m_count = iterator_range_size(tile_first1, tile_last1) - 1; + + *this << + "uint i = get_global_id(0);\n" << + "uint start1 = " << tile_first1[expr<uint_>("i")] << ";\n" << + "uint end1 = " << tile_first1[expr<uint_>("i+1")] << ";\n" << + "uint start2 = " << tile_first2[expr<uint_>("i")] << ";\n" << + "uint end2 = " << tile_first2[expr<uint_>("i+1")] << ";\n" << + "uint index = i*" << tile_size << ";\n" << + "uint count = 0;\n" << + "while(start1<end1 && start2<end2)\n" << + "{\n" << + " if(" << first1[expr<uint_>("start1")] << " == " << + first2[expr<uint_>("start2")] << ")\n" << + " {\n" << + result[expr<uint_>("index")] << + " = " << first1[expr<uint_>("start1")] << ";\n" << + " index++; count++;\n" << + " start1++; start2++;\n" << + " }\n" << + " else if(" << first1[expr<uint_>("start1")] << " < " << + first2[expr<uint_>("start2")] << ")\n" << + " start1++;\n" << + " else start2++;\n" << + "}\n" << + counts[expr<uint_>("i")] << " = count;\n"; + } + + event exec(command_queue &queue) + { + if(m_count == 0) { + return event(); + } + + return exec_1d(queue, 0, m_count); + } + +private: + size_t m_count; +}; + +} //end detail namespace + +/// +/// \brief Set intersection algorithm +/// +/// Finds the intersection of the sorted range [first1, last1) with the sorted +/// range [first2, last2) and stores it in range starting at result +/// \return Iterator pointing to end of intersection +/// +/// \param first1 Iterator pointing to start of first set +/// \param last1 Iterator pointing to end of first set +/// \param first2 Iterator pointing to start of second set +/// \param last2 Iterator pointing to end of second set +/// \param result Iterator pointing to start of range in which the intersection +/// will be stored +/// \param queue Queue on which to execute +/// +template<class InputIterator1, class InputIterator2, class OutputIterator> +inline OutputIterator set_intersection(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<InputIterator1>::value_type value_type; + + int tile_size = 1024; + + int count1 = detail::iterator_range_size(first1, last1); + int count2 = detail::iterator_range_size(first2, last2); + + vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); + vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); + + // Tile the sets + detail::balanced_path_kernel tiling_kernel; + tiling_kernel.tile_size = tile_size; + tiling_kernel.set_range(first1, last1, first2, last2, + tile_a.begin()+1, tile_b.begin()+1); + fill_n(tile_a.begin(), 1, 0, queue); + fill_n(tile_b.begin(), 1, 0, queue); + tiling_kernel.exec(queue); + + fill_n(tile_a.end()-1, 1, count1, queue); + fill_n(tile_b.end()-1, 1, count2, queue); + + vector<value_type> temp_result(count1+count2, queue.get_context()); + vector<uint_> counts((count1+count2+tile_size-1)/tile_size + 1, queue.get_context()); + fill_n(counts.end()-1, 1, 0, queue); + + // Find individual intersections + detail::serial_set_intersection_kernel intersection_kernel; + intersection_kernel.tile_size = tile_size; + intersection_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(), + tile_b.begin(), temp_result.begin(), counts.begin()); + + intersection_kernel.exec(queue); + + exclusive_scan(counts.begin(), counts.end(), counts.begin(), queue); + + // Compact the results + detail::compact_kernel compact_kernel; + compact_kernel.tile_size = tile_size; + compact_kernel.set_range(temp_result.begin(), counts.begin(), counts.end(), result); + + compact_kernel.exec(queue); + + return result + (counts.end() - 1).read(queue); +} + +} //end compute namespace +} //end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_SET_INTERSECTION_HPP diff --git a/boost/compute/algorithm/set_symmetric_difference.hpp b/boost/compute/algorithm/set_symmetric_difference.hpp new file mode 100644 index 0000000000..6e60b38511 --- /dev/null +++ b/boost/compute/algorithm/set_symmetric_difference.hpp @@ -0,0 +1,194 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_SET_SYMMETRIC_DIFFERENCE_HPP +#define BOOST_COMPUTE_ALGORITHM_SET_SYMMETRIC_DIFFERENCE_HPP + +#include <iterator> + +#include <boost/compute/algorithm/detail/compact.hpp> +#include <boost/compute/algorithm/detail/balanced_path.hpp> +#include <boost/compute/algorithm/exclusive_scan.hpp> +#include <boost/compute/algorithm/fill_n.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/system.hpp> + +namespace boost { +namespace compute { +namespace detail { + +/// +/// \brief Serial set symmetric difference kernel class +/// +/// Subclass of meta_kernel to perform serial set symmetric +/// difference after tiling +/// +class serial_set_symmetric_difference_kernel : meta_kernel +{ +public: + unsigned int tile_size; + + serial_set_symmetric_difference_kernel() : meta_kernel("set_symmetric_difference") + { + tile_size = 4; + } + + template<class InputIterator1, class InputIterator2, + class InputIterator3, class InputIterator4, + class OutputIterator1, class OutputIterator2> + void set_range(InputIterator1 first1, + InputIterator2 first2, + InputIterator3 tile_first1, + InputIterator3 tile_last1, + InputIterator4 tile_first2, + OutputIterator1 result, + OutputIterator2 counts) + { + m_count = iterator_range_size(tile_first1, tile_last1) - 1; + + *this << + "uint i = get_global_id(0);\n" << + "uint start1 = " << tile_first1[expr<uint_>("i")] << ";\n" << + "uint end1 = " << tile_first1[expr<uint_>("i+1")] << ";\n" << + "uint start2 = " << tile_first2[expr<uint_>("i")] << ";\n" << + "uint end2 = " << tile_first2[expr<uint_>("i+1")] << ";\n" << + "uint index = i*" << tile_size << ";\n" << + "uint count = 0;\n" << + "while(start1<end1 && start2<end2)\n" << + "{\n" << + " if(" << first1[expr<uint_>("start1")] << " == " << + first2[expr<uint_>("start2")] << ")\n" << + " {\n" << + " start1++; start2++;\n" << + " }\n" << + " else if(" << first1[expr<uint_>("start1")] << " < " << + first2[expr<uint_>("start2")] << ")\n" << + " {\n" << + result[expr<uint_>("index")] << + " = " << first1[expr<uint_>("start1")] << ";\n" << + " index++; count++;\n" << + " start1++;\n" << + " }\n" << + " else\n" << + " {\n" << + result[expr<uint_>("index")] << + " = " << first2[expr<uint_>("start2")] << ";\n" << + " index++; count++;\n" << + " start2++;\n" << + " }\n" << + "}\n" << + "while(start1<end1)\n" << + "{\n" << + result[expr<uint_>("index")] << + " = " << first1[expr<uint_>("start1")] << ";\n" << + " index++; count++;\n" << + " start1++;\n" << + "}\n" << + "while(start2<end2)\n" << + "{\n" << + result[expr<uint_>("index")] << + " = " << first2[expr<uint_>("start2")] << ";\n" << + " index++; count++;\n" << + " start2++;\n" << + "}\n" << + counts[expr<uint_>("i")] << " = count;\n"; + } + + event exec(command_queue &queue) + { + if(m_count == 0) { + return event(); + } + + return exec_1d(queue, 0, m_count); + } + +private: + size_t m_count; +}; + +} //end detail namespace + +/// +/// \brief Set symmetric difference algorithm +/// +/// Finds the symmetric difference of the sorted range [first2, last2) from +/// the sorted range [first1, last1) and stores it in range starting at result +/// \return Iterator pointing to end of symmetric difference +/// +/// \param first1 Iterator pointing to start of first set +/// \param last1 Iterator pointing to end of first set +/// \param first2 Iterator pointing to start of second set +/// \param last2 Iterator pointing to end of second set +/// \param result Iterator pointing to start of range in which the symmetric +/// difference will be stored +/// \param queue Queue on which to execute +/// +template<class InputIterator1, class InputIterator2, class OutputIterator> +inline OutputIterator set_symmetric_difference(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<InputIterator1>::value_type value_type; + + int tile_size = 1024; + + int count1 = detail::iterator_range_size(first1, last1); + int count2 = detail::iterator_range_size(first2, last2); + + vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); + vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); + + // Tile the sets + detail::balanced_path_kernel tiling_kernel; + tiling_kernel.tile_size = tile_size; + tiling_kernel.set_range(first1, last1, first2, last2, + tile_a.begin()+1, tile_b.begin()+1); + fill_n(tile_a.begin(), 1, 0, queue); + fill_n(tile_b.begin(), 1, 0, queue); + tiling_kernel.exec(queue); + + fill_n(tile_a.end()-1, 1, count1, queue); + fill_n(tile_b.end()-1, 1, count2, queue); + + vector<value_type> temp_result(count1+count2, queue.get_context()); + vector<uint_> counts((count1+count2+tile_size-1)/tile_size + 1, queue.get_context()); + fill_n(counts.end()-1, 1, 0, queue); + + // Find individual symmetric differences + detail::serial_set_symmetric_difference_kernel symmetric_difference_kernel; + symmetric_difference_kernel.tile_size = tile_size; + symmetric_difference_kernel.set_range(first1, first2, tile_a.begin(), + tile_a.end(), tile_b.begin(), + temp_result.begin(), counts.begin()); + + symmetric_difference_kernel.exec(queue); + + exclusive_scan(counts.begin(), counts.end(), counts.begin(), queue); + + // Compact the results + detail::compact_kernel compact_kernel; + compact_kernel.tile_size = tile_size; + compact_kernel.set_range(temp_result.begin(), counts.begin(), counts.end(), result); + + compact_kernel.exec(queue); + + return result + (counts.end() - 1).read(queue); +} + +} //end compute namespace +} //end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_SET_SYMMETRIC_DIFFERENCE_HPP diff --git a/boost/compute/algorithm/set_union.hpp b/boost/compute/algorithm/set_union.hpp new file mode 100644 index 0000000000..c61f7b29b3 --- /dev/null +++ b/boost/compute/algorithm/set_union.hpp @@ -0,0 +1,195 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_SET_UNION_HPP +#define BOOST_COMPUTE_ALGORITHM_SET_UNION_HPP + +#include <iterator> + +#include <boost/compute/algorithm/detail/balanced_path.hpp> +#include <boost/compute/algorithm/detail/compact.hpp> +#include <boost/compute/algorithm/exclusive_scan.hpp> +#include <boost/compute/algorithm/fill_n.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/system.hpp> + +namespace boost { +namespace compute { +namespace detail { + +/// +/// \brief Serial set union kernel class +/// +/// Subclass of meta_kernel to perform serial set union after tiling +/// +class serial_set_union_kernel : meta_kernel +{ +public: + unsigned int tile_size; + + serial_set_union_kernel() : meta_kernel("set_union") + { + tile_size = 4; + } + + template<class InputIterator1, class InputIterator2, + class InputIterator3, class InputIterator4, + class OutputIterator1, class OutputIterator2> + void set_range(InputIterator1 first1, + InputIterator2 first2, + InputIterator3 tile_first1, + InputIterator3 tile_last1, + InputIterator4 tile_first2, + OutputIterator1 result, + OutputIterator2 counts) + { + m_count = iterator_range_size(tile_first1, tile_last1) - 1; + + *this << + "uint i = get_global_id(0);\n" << + "uint start1 = " << tile_first1[expr<uint_>("i")] << ";\n" << + "uint end1 = " << tile_first1[expr<uint_>("i+1")] << ";\n" << + "uint start2 = " << tile_first2[expr<uint_>("i")] << ";\n" << + "uint end2 = " << tile_first2[expr<uint_>("i+1")] << ";\n" << + "uint index = i*" << tile_size << ";\n" << + "uint count = 0;\n" << + "while(start1<end1 && start2<end2)\n" << + "{\n" << + " if(" << first1[expr<uint_>("start1")] << " == " << + first2[expr<uint_>("start2")] << ")\n" << + " {\n" << + result[expr<uint_>("index")] << + " = " << first1[expr<uint_>("start1")] << ";\n" << + " index++; count++;\n" << + " start1++; start2++;\n" << + " }\n" << + " else if(" << first1[expr<uint_>("start1")] << " < " << + first2[expr<uint_>("start2")] << ")\n" << + " {\n" << + result[expr<uint_>("index")] << + " = " << first1[expr<uint_>("start1")] << ";\n" << + " index++; count++;\n" << + " start1++;\n" << + " }\n" << + " else\n" << + " {\n" << + result[expr<uint_>("index")] << + " = " << first2[expr<uint_>("start2")] << ";\n" << + " index++; count++;\n" << + " start2++;\n" << + " }\n" << + "}\n" << + "while(start1<end1)\n" << + "{\n" << + result[expr<uint_>("index")] << + " = " << first1[expr<uint_>("start1")] << ";\n" << + " index++; count++;\n" << + " start1++;\n" << + "}\n" << + "while(start2<end2)\n" << + "{\n" << + result[expr<uint_>("index")] << + " = " << first2[expr<uint_>("start2")] << ";\n" << + " index++; count++;\n" << + " start2++;\n" << + "}\n" << + counts[expr<uint_>("i")] << " = count;\n"; + } + + event exec(command_queue &queue) + { + if(m_count == 0) { + return event(); + } + + return exec_1d(queue, 0, m_count); + } + +private: + size_t m_count; +}; + +} //end detail namespace + +/// +/// \brief Set union algorithm +/// +/// Finds the union of the sorted range [first1, last1) with the sorted +/// range [first2, last2) and stores it in range starting at result +/// \return Iterator pointing to end of union +/// +/// \param first1 Iterator pointing to start of first set +/// \param last1 Iterator pointing to end of first set +/// \param first2 Iterator pointing to start of second set +/// \param last2 Iterator pointing to end of second set +/// \param result Iterator pointing to start of range in which the union +/// will be stored +/// \param queue Queue on which to execute +/// +template<class InputIterator1, class InputIterator2, class OutputIterator> +inline OutputIterator set_union(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<InputIterator1>::value_type value_type; + + int tile_size = 1024; + + int count1 = detail::iterator_range_size(first1, last1); + int count2 = detail::iterator_range_size(first2, last2); + + vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); + vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); + + // Tile the sets + detail::balanced_path_kernel tiling_kernel; + tiling_kernel.tile_size = tile_size; + tiling_kernel.set_range(first1, last1, first2, last2, + tile_a.begin()+1, tile_b.begin()+1); + fill_n(tile_a.begin(), 1, 0, queue); + fill_n(tile_b.begin(), 1, 0, queue); + tiling_kernel.exec(queue); + + fill_n(tile_a.end()-1, 1, count1, queue); + fill_n(tile_b.end()-1, 1, count2, queue); + + vector<value_type> temp_result(count1+count2, queue.get_context()); + vector<uint_> counts((count1+count2+tile_size-1)/tile_size + 1, queue.get_context()); + fill_n(counts.end()-1, 1, 0, queue); + + // Find individual unions + detail::serial_set_union_kernel union_kernel; + union_kernel.tile_size = tile_size; + union_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(), + tile_b.begin(), temp_result.begin(), counts.begin()); + + union_kernel.exec(queue); + + exclusive_scan(counts.begin(), counts.end(), counts.begin(), queue); + + // Compact the results + detail::compact_kernel compact_kernel; + compact_kernel.tile_size = tile_size; + compact_kernel.set_range(temp_result.begin(), counts.begin(), counts.end(), result); + + compact_kernel.exec(queue); + + return result + (counts.end() - 1).read(queue); +} + +} //end compute namespace +} //end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_SET_UNION_HPP diff --git a/boost/compute/algorithm/sort.hpp b/boost/compute/algorithm/sort.hpp new file mode 100644 index 0000000000..b2730b3e2b --- /dev/null +++ b/boost/compute/algorithm/sort.hpp @@ -0,0 +1,194 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_SORT_HPP +#define BOOST_COMPUTE_ALGORITHM_SORT_HPP + +#include <iterator> + +#include <boost/utility/enable_if.hpp> + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/detail/merge_sort_on_cpu.hpp> +#include <boost/compute/algorithm/detail/radix_sort.hpp> +#include <boost/compute/algorithm/detail/insertion_sort.hpp> +#include <boost/compute/algorithm/reverse.hpp> +#include <boost/compute/container/mapped_view.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/iterator/buffer_iterator.hpp> +#include <boost/compute/type_traits/is_device_iterator.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class T> +inline void dispatch_gpu_sort(buffer_iterator<T> first, + buffer_iterator<T> last, + less<T>, + command_queue &queue, + typename boost::enable_if_c< + is_radix_sortable<T>::value + >::type* = 0) +{ + size_t count = detail::iterator_range_size(first, last); + + if(count < 2){ + // nothing to do + return; + } + else if(count <= 32){ + ::boost::compute::detail::serial_insertion_sort(first, last, queue); + } + else { + ::boost::compute::detail::radix_sort(first, last, queue); + } +} + +template<class T> +inline void dispatch_gpu_sort(buffer_iterator<T> first, + buffer_iterator<T> last, + greater<T> compare, + command_queue &queue, + typename boost::enable_if_c< + is_radix_sortable<T>::value + >::type* = 0) +{ + size_t count = detail::iterator_range_size(first, last); + + if(count < 2){ + // nothing to do + return; + } + else if(count <= 32){ + ::boost::compute::detail::serial_insertion_sort( + first, last, compare, queue + ); + } + else { + // radix sort in ascending order + ::boost::compute::detail::radix_sort(first, last, queue); + + // reverse range to descending order + ::boost::compute::reverse(first, last, queue); + } +} + +template<class Iterator, class Compare> +inline void dispatch_gpu_sort(Iterator first, + Iterator last, + Compare compare, + command_queue &queue) +{ + ::boost::compute::detail::serial_insertion_sort( + first, last, compare, queue + ); +} + +// sort() for device iterators +template<class Iterator, class Compare> +inline void dispatch_sort(Iterator first, + Iterator last, + Compare compare, + command_queue &queue, + typename boost::enable_if< + is_device_iterator<Iterator> + >::type* = 0) +{ + if(queue.get_device().type() & device::gpu) { + dispatch_gpu_sort(first, last, compare, queue); + return; + } + ::boost::compute::detail::merge_sort_on_cpu(first, last, compare, queue); +} + +// sort() for host iterators +template<class Iterator, class Compare> +inline void dispatch_sort(Iterator first, + Iterator last, + Compare compare, + command_queue &queue, + typename boost::disable_if< + is_device_iterator<Iterator> + >::type* = 0) +{ + typedef typename std::iterator_traits<Iterator>::value_type T; + + size_t size = static_cast<size_t>(std::distance(first, last)); + + // create mapped buffer + mapped_view<T> view( + boost::addressof(*first), size, queue.get_context() + ); + + // sort mapped buffer + dispatch_sort(view.begin(), view.end(), compare, queue); + + // return results to host + view.map(queue); +} + +} // end detail namespace + +/// Sorts the values in the range [\p first, \p last) according to +/// \p compare. +/// +/// \param first first element in the range to sort +/// \param last last element in the range to sort +/// \param compare comparison function (by default \c less) +/// \param queue command queue to perform the operation +/// +/// For example, to sort a vector on the device: +/// \code +/// // create vector on the device with data +/// float data[] = { 2.f, 4.f, 1.f, 3.f }; +/// boost::compute::vector<float> vec(data, data + 4, queue); +/// +/// // sort the vector on the device +/// boost::compute::sort(vec.begin(), vec.end(), queue); +/// \endcode +/// +/// The sort() algorithm can also be directly used with host iterators. This +/// example will automatically transfer the data to the device, sort it, and +/// then transfer the data back to the host: +/// \code +/// std::vector<int> data = { 9, 3, 2, 5, 1, 4, 6, 7 }; +/// +/// boost::compute::sort(data.begin(), data.end(), queue); +/// \endcode +/// +/// \see is_sorted() +template<class Iterator, class Compare> +inline void sort(Iterator first, + Iterator last, + Compare compare, + command_queue &queue = system::default_queue()) +{ + ::boost::compute::detail::dispatch_sort(first, last, compare, queue); +} + +/// \overload +template<class Iterator> +inline void sort(Iterator first, + Iterator last, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<Iterator>::value_type value_type; + + ::boost::compute::sort( + first, last, ::boost::compute::less<value_type>(), queue + ); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_SORT_HPP diff --git a/boost/compute/algorithm/sort_by_key.hpp b/boost/compute/algorithm/sort_by_key.hpp new file mode 100644 index 0000000000..0e3dba81eb --- /dev/null +++ b/boost/compute/algorithm/sort_by_key.hpp @@ -0,0 +1,156 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_SORT_BY_KEY_HPP +#define BOOST_COMPUTE_ALGORITHM_SORT_BY_KEY_HPP + +#include <iterator> + +#include <boost/utility/enable_if.hpp> + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/detail/merge_sort_on_cpu.hpp> +#include <boost/compute/algorithm/detail/insertion_sort.hpp> +#include <boost/compute/algorithm/detail/radix_sort.hpp> +#include <boost/compute/algorithm/reverse.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> + +namespace boost { +namespace compute { + +namespace detail { + +template<class KeyIterator, class ValueIterator> +inline void +dispatch_gpu_sort_by_key(KeyIterator keys_first, + KeyIterator keys_last, + ValueIterator values_first, + less<typename std::iterator_traits<KeyIterator>::value_type> compare, + command_queue &queue, + typename boost::enable_if_c< + is_radix_sortable< + typename std::iterator_traits<KeyIterator>::value_type + >::value + >::type* = 0) +{ + size_t count = detail::iterator_range_size(keys_first, keys_last); + + if(count < 32){ + detail::serial_insertion_sort_by_key( + keys_first, keys_last, values_first, compare, queue + ); + } + else { + detail::radix_sort_by_key( + keys_first, keys_last, values_first, queue + ); + } +} + +template<class KeyIterator, class ValueIterator> +inline void +dispatch_gpu_sort_by_key(KeyIterator keys_first, + KeyIterator keys_last, + ValueIterator values_first, + greater<typename std::iterator_traits<KeyIterator>::value_type> compare, + command_queue &queue, + typename boost::enable_if_c< + is_radix_sortable< + typename std::iterator_traits<KeyIterator>::value_type + >::value + >::type* = 0) +{ + size_t count = detail::iterator_range_size(keys_first, keys_last); + + if(count < 32){ + detail::serial_insertion_sort_by_key( + keys_first, keys_last, values_first, compare, queue + ); + } + else { + // radix sorts in ascending order + detail::radix_sort_by_key( + keys_first, keys_last, values_first, queue + ); + + // Reverse keys, values for descending order + ::boost::compute::reverse(keys_first, keys_last, queue); + ::boost::compute::reverse(values_first, values_first + count, queue); + } +} + +template<class KeyIterator, class ValueIterator, class Compare> +inline void dispatch_gpu_sort_by_key(KeyIterator keys_first, + KeyIterator keys_last, + ValueIterator values_first, + Compare compare, + command_queue &queue) +{ + detail::serial_insertion_sort_by_key( + keys_first, keys_last, values_first, compare, queue + ); +} + +template<class KeyIterator, class ValueIterator, class Compare> +inline void dispatch_sort_by_key(KeyIterator keys_first, + KeyIterator keys_last, + ValueIterator values_first, + Compare compare, + command_queue &queue) +{ + if(queue.get_device().type() & device::gpu) { + dispatch_gpu_sort_by_key(keys_first, keys_last, values_first, compare, queue); + return; + } + ::boost::compute::detail::merge_sort_by_key_on_cpu( + keys_first, keys_last, values_first, compare, queue + ); +} + +} // end detail namespace + +/// Performs a key-value sort using the keys in the range [\p keys_first, +/// \p keys_last) on the values in the range [\p values_first, +/// \p values_first \c + (\p keys_last \c - \p keys_first)) using \p compare. +/// +/// If no compare function is specified, \c less is used. +/// +/// \see sort() +template<class KeyIterator, class ValueIterator, class Compare> +inline void sort_by_key(KeyIterator keys_first, + KeyIterator keys_last, + ValueIterator values_first, + Compare compare, + command_queue &queue = system::default_queue()) +{ + ::boost::compute::detail::dispatch_sort_by_key( + keys_first, keys_last, values_first, compare, queue + ); +} + +/// \overload +template<class KeyIterator, class ValueIterator> +inline void sort_by_key(KeyIterator keys_first, + KeyIterator keys_last, + ValueIterator values_first, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<KeyIterator>::value_type key_type; + + ::boost::compute::sort_by_key( + keys_first, keys_last, values_first, less<key_type>(), queue + ); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_SORT_BY_KEY_HPP diff --git a/boost/compute/algorithm/stable_partition.hpp b/boost/compute/algorithm/stable_partition.hpp new file mode 100644 index 0000000000..283b068283 --- /dev/null +++ b/boost/compute/algorithm/stable_partition.hpp @@ -0,0 +1,72 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_STABLE_PARTITION_HPP +#define BOOST_COMPUTE_ALGORITHM_STABLE_PARTITION_HPP + +#include <boost/compute/system.hpp> +#include <boost/compute/context.hpp> +#include <boost/compute/functional.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/copy_if.hpp> +#include <boost/compute/container/vector.hpp> + +namespace boost { +namespace compute { + +/// +/// \brief Partitioning algorithm +/// +/// Partitions the elements in the range [\p first, \p last) according to +/// \p predicate. The order of the elements is preserved. +/// \return Iterator pointing to end of true values +/// +/// \param first Iterator pointing to start of range +/// \param last Iterator pointing to end of range +/// \param predicate Unary predicate to be applied on each element +/// \param queue Queue on which to execute +/// +/// \see is_partitioned() and partition() +/// +template<class Iterator, class UnaryPredicate> +inline Iterator stable_partition(Iterator first, + Iterator last, + UnaryPredicate predicate, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<Iterator>::value_type value_type; + + // make temporary copy of the input + ::boost::compute::vector<value_type> tmp(first, last, queue); + + // copy true values + Iterator last_true = + ::boost::compute::copy_if(tmp.begin(), + tmp.end(), + first, + predicate, + queue); + + // copy false values + Iterator last_false = + ::boost::compute::copy_if(tmp.begin(), + tmp.end(), + last_true, + not1(predicate), + queue); + + // return iterator pointing to the last true value + return last_true; +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_STABLE_PARTITION_HPP diff --git a/boost/compute/algorithm/stable_sort.hpp b/boost/compute/algorithm/stable_sort.hpp new file mode 100644 index 0000000000..cd82a0a606 --- /dev/null +++ b/boost/compute/algorithm/stable_sort.hpp @@ -0,0 +1,99 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_STABLE_SORT_HPP +#define BOOST_COMPUTE_ALGORITHM_STABLE_SORT_HPP + +#include <iterator> + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/detail/merge_sort_on_cpu.hpp> +#include <boost/compute/algorithm/detail/radix_sort.hpp> +#include <boost/compute/algorithm/detail/insertion_sort.hpp> +#include <boost/compute/algorithm/reverse.hpp> +#include <boost/compute/functional/operator.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class Iterator, class Compare> +inline void dispatch_gpu_stable_sort(Iterator first, + Iterator last, + Compare compare, + command_queue &queue) +{ + ::boost::compute::detail::serial_insertion_sort( + first, last, compare, queue + ); +} + +template<class T> +inline typename boost::enable_if_c<is_radix_sortable<T>::value>::type +dispatch_gpu_stable_sort(buffer_iterator<T> first, + buffer_iterator<T> last, + less<T>, + command_queue &queue) +{ + ::boost::compute::detail::radix_sort(first, last, queue); +} + +template<class T> +inline typename boost::enable_if_c<is_radix_sortable<T>::value>::type +dispatch_gpu_stable_sort(buffer_iterator<T> first, + buffer_iterator<T> last, + greater<T>, + command_queue &queue) +{ + // radix sort in ascending order + ::boost::compute::detail::radix_sort(first, last, queue); + + // reverse range to descending order + ::boost::compute::reverse(first, last, queue); +} + +} // end detail namespace + +/// Sorts the values in the range [\p first, \p last) according to +/// \p compare. The relative order of identical values is preserved. +/// +/// \see sort(), is_sorted() +template<class Iterator, class Compare> +inline void stable_sort(Iterator first, + Iterator last, + Compare compare, + command_queue &queue = system::default_queue()) +{ + if(queue.get_device().type() & device::gpu) { + ::boost::compute::detail::dispatch_gpu_stable_sort( + first, last, compare, queue + ); + } + ::boost::compute::detail::merge_sort_on_cpu(first, last, compare, queue); +} + +/// \overload +template<class Iterator> +inline void stable_sort(Iterator first, + Iterator last, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<Iterator>::value_type value_type; + + ::boost::compute::less<value_type> less; + + ::boost::compute::stable_sort(first, last, less, queue); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_STABLE_SORT_HPP diff --git a/boost/compute/algorithm/stable_sort_by_key.hpp b/boost/compute/algorithm/stable_sort_by_key.hpp new file mode 100644 index 0000000000..8a51372ede --- /dev/null +++ b/boost/compute/algorithm/stable_sort_by_key.hpp @@ -0,0 +1,61 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2016 Jakub Szuppe <j.szuppe@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_STABLE_SORT_BY_KEY_HPP +#define BOOST_COMPUTE_ALGORITHM_STABLE_SORT_BY_KEY_HPP + +#include <iterator> + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/sort_by_key.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> + +namespace boost { +namespace compute { + +/// Performs a key-value stable sort using the keys in the range [\p keys_first, +/// \p keys_last) on the values in the range [\p values_first, +/// \p values_first \c + (\p keys_last \c - \p keys_first)) using \p compare. +/// +/// If no compare function is specified, \c less is used. +/// +/// \see sort() +template<class KeyIterator, class ValueIterator, class Compare> +inline void stable_sort_by_key(KeyIterator keys_first, + KeyIterator keys_last, + ValueIterator values_first, + Compare compare, + command_queue &queue = system::default_queue()) +{ + // sort_by_key is stable + ::boost::compute::sort_by_key( + keys_first, keys_last, values_first, compare, queue + ); +} + +/// \overload +template<class KeyIterator, class ValueIterator> +inline void stable_sort_by_key(KeyIterator keys_first, + KeyIterator keys_last, + ValueIterator values_first, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<KeyIterator>::value_type key_type; + + ::boost::compute::stable_sort_by_key( + keys_first, keys_last, values_first, less<key_type>(), queue + ); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_STABLE_SORT_BY_KEY_HPP diff --git a/boost/compute/algorithm/swap_ranges.hpp b/boost/compute/algorithm/swap_ranges.hpp new file mode 100644 index 0000000000..6ff3e14f6a --- /dev/null +++ b/boost/compute/algorithm/swap_ranges.hpp @@ -0,0 +1,44 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_SWAP_RANGES_HPP +#define BOOST_COMPUTE_ALGORITHM_SWAP_RANGES_HPP + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/copy.hpp> +#include <boost/compute/container/vector.hpp> + +namespace boost { +namespace compute { + +/// Swaps the elements in the range [\p first1, \p last1) with the +/// elements in the range beginning at \p first2. +template<class Iterator1, class Iterator2> +inline Iterator2 swap_ranges(Iterator1 first1, + Iterator1 last1, + Iterator2 first2, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<Iterator1>::value_type value_type; + + Iterator2 last2 = first2 + std::distance(first1, last1); + + ::boost::compute::vector<value_type> tmp(first1, last1, queue); + ::boost::compute::copy(first2, last2, first1, queue); + ::boost::compute::copy(tmp.begin(), tmp.end(), first2, queue); + + return last2; +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_SWAP_RANGES_HPP diff --git a/boost/compute/algorithm/transform.hpp b/boost/compute/algorithm/transform.hpp new file mode 100644 index 0000000000..022a4988bd --- /dev/null +++ b/boost/compute/algorithm/transform.hpp @@ -0,0 +1,76 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_TRANSFORM_HPP +#define BOOST_COMPUTE_ALGORITHM_TRANSFORM_HPP + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/copy.hpp> +#include <boost/compute/iterator/transform_iterator.hpp> +#include <boost/compute/iterator/zip_iterator.hpp> +#include <boost/compute/functional/detail/unpack.hpp> + +namespace boost { +namespace compute { + +/// Transforms the elements in the range [\p first, \p last) using +/// \p transform and stores the results in the range beginning at +/// \p result. +/// +/// For example, to calculate the absolute value for each element in a vector: +/// +/// \snippet test/test_transform.cpp transform_abs +/// +/// \see copy() +template<class InputIterator, class OutputIterator, class UnaryOperator> +inline OutputIterator transform(InputIterator first, + InputIterator last, + OutputIterator result, + UnaryOperator op, + command_queue &queue = system::default_queue()) +{ + return copy( + ::boost::compute::make_transform_iterator(first, op), + ::boost::compute::make_transform_iterator(last, op), + result, + queue + ); +} + +/// \overload +template<class InputIterator1, + class InputIterator2, + class OutputIterator, + class BinaryOperator> +inline OutputIterator transform(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + BinaryOperator op, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<InputIterator1>::difference_type difference_type; + + difference_type n = std::distance(first1, last1); + + return transform( + make_zip_iterator(boost::make_tuple(first1, first2)), + make_zip_iterator(boost::make_tuple(last1, first2 + n)), + result, + detail::unpack(op), + queue + ); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_TRANSFORM_HPP diff --git a/boost/compute/algorithm/transform_if.hpp b/boost/compute/algorithm/transform_if.hpp new file mode 100644 index 0000000000..0eb0fd434e --- /dev/null +++ b/boost/compute/algorithm/transform_if.hpp @@ -0,0 +1,117 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_TRANSFORM_IF_HPP +#define BOOST_COMPUTE_ALGORITHM_TRANSFORM_IF_HPP + +#include <boost/compute/cl.hpp> +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/count.hpp> +#include <boost/compute/algorithm/count_if.hpp> +#include <boost/compute/algorithm/exclusive_scan.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/iterator/discard_iterator.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class InputIterator, class OutputIterator, class UnaryFunction, class Predicate> +inline OutputIterator transform_if_impl(InputIterator first, + InputIterator last, + OutputIterator result, + UnaryFunction function, + Predicate predicate, + bool copyIndex, + command_queue &queue) +{ + typedef typename std::iterator_traits<OutputIterator>::difference_type difference_type; + + size_t count = detail::iterator_range_size(first, last); + if(count == 0){ + return result; + } + + const context &context = queue.get_context(); + + // storage for destination indices + ::boost::compute::vector<cl_uint> indices(count, context); + + // write counts + ::boost::compute::detail::meta_kernel k1("transform_if_write_counts"); + k1 << indices.begin()[k1.get_global_id(0)] << " = " + << predicate(first[k1.get_global_id(0)]) << " ? 1 : 0;\n"; + k1.exec_1d(queue, 0, count); + + // count number of elements to be copied + size_t copied_element_count = + ::boost::compute::count(indices.begin(), indices.end(), 1, queue); + + // scan indices + ::boost::compute::exclusive_scan( + indices.begin(), indices.end(), indices.begin(), queue + ); + + // copy values + ::boost::compute::detail::meta_kernel k2("transform_if_do_copy"); + k2 << "if(" << predicate(first[k2.get_global_id(0)]) << ")" << + " " << result[indices.begin()[k2.get_global_id(0)]] << "="; + + if(copyIndex){ + k2 << k2.get_global_id(0) << ";\n"; + } + else { + k2 << function(first[k2.get_global_id(0)]) << ";\n"; + } + + k2.exec_1d(queue, 0, count); + + return result + static_cast<difference_type>(copied_element_count); +} + +template<class InputIterator, class UnaryFunction, class Predicate> +inline discard_iterator transform_if_impl(InputIterator first, + InputIterator last, + discard_iterator result, + UnaryFunction function, + Predicate predicate, + bool copyIndex, + command_queue &queue) +{ + (void) function; + (void) copyIndex; + + return result + count_if(first, last, predicate, queue); +} + +} // end detail namespace + +/// Copies each element in the range [\p first, \p last) for which +/// \p predicate returns \c true to the range beginning at \p result. +template<class InputIterator, class OutputIterator, class UnaryFunction, class Predicate> +inline OutputIterator transform_if(InputIterator first, + InputIterator last, + OutputIterator result, + UnaryFunction function, + Predicate predicate, + command_queue &queue = system::default_queue()) +{ + return detail::transform_if_impl( + first, last, result, function, predicate, false, queue + ); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_TRANSFORM_IF_HPP diff --git a/boost/compute/algorithm/transform_reduce.hpp b/boost/compute/algorithm/transform_reduce.hpp new file mode 100644 index 0000000000..fbeee5a691 --- /dev/null +++ b/boost/compute/algorithm/transform_reduce.hpp @@ -0,0 +1,89 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_TRANSFORM_REDUCE_HPP +#define BOOST_COMPUTE_ALGORITHM_TRANSFORM_REDUCE_HPP + +#include <boost/compute/system.hpp> +#include <boost/compute/algorithm/reduce.hpp> +#include <boost/compute/iterator/transform_iterator.hpp> +#include <boost/compute/iterator/zip_iterator.hpp> +#include <boost/compute/functional/detail/unpack.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> + +namespace boost { +namespace compute { + +/// Transforms each value in the range [\p first, \p last) with the unary +/// \p transform_function and then reduces each transformed value with +/// \p reduce_function. +/// +/// For example, to calculate the sum of the absolute values of a vector +/// of integers: +/// +/// \snippet test/test_transform_reduce.cpp sum_abs_int +/// +/// \see reduce(), inner_product() +template<class InputIterator, + class OutputIterator, + class UnaryTransformFunction, + class BinaryReduceFunction> +inline void transform_reduce(InputIterator first, + InputIterator last, + OutputIterator result, + UnaryTransformFunction transform_function, + BinaryReduceFunction reduce_function, + command_queue &queue = system::default_queue()) +{ + ::boost::compute::reduce( + ::boost::compute::make_transform_iterator(first, transform_function), + ::boost::compute::make_transform_iterator(last, transform_function), + result, + reduce_function, + queue + ); +} + +/// \overload +template<class InputIterator1, + class InputIterator2, + class OutputIterator, + class BinaryTransformFunction, + class BinaryReduceFunction> +inline void transform_reduce(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + OutputIterator result, + BinaryTransformFunction transform_function, + BinaryReduceFunction reduce_function, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<InputIterator1>::difference_type difference_type; + + difference_type n = std::distance(first1, last1); + + ::boost::compute::transform_reduce( + ::boost::compute::make_zip_iterator( + boost::make_tuple(first1, first2) + ), + ::boost::compute::make_zip_iterator( + boost::make_tuple(last1, first2 + n) + ), + result, + detail::unpack(transform_function), + reduce_function, + queue + ); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_TRANSFORM_REDUCE_HPP diff --git a/boost/compute/algorithm/unique.hpp b/boost/compute/algorithm/unique.hpp new file mode 100644 index 0000000000..faa36bad9d --- /dev/null +++ b/boost/compute/algorithm/unique.hpp @@ -0,0 +1,66 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_UNIQUE_HPP +#define BOOST_COMPUTE_ALGORITHM_UNIQUE_HPP + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/unique_copy.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/functional/operator.hpp> + +namespace boost { +namespace compute { + +/// Removes all consecutive duplicate elements (determined by \p op) from the +/// range [first, last). If \p op is not provided, the equality operator is +/// used. +/// +/// \param first first element in the input range +/// \param last last element in the input range +/// \param op binary operator used to check for uniqueness +/// \param queue command queue to perform the operation +/// +/// \return \c InputIterator to the new logical end of the range +/// +/// \see unique_copy() +template<class InputIterator, class BinaryPredicate> +inline InputIterator unique(InputIterator first, + InputIterator last, + BinaryPredicate op, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<InputIterator>::value_type value_type; + + vector<value_type> temp(first, last, queue); + + return ::boost::compute::unique_copy( + temp.begin(), temp.end(), first, op, queue + ); +} + +/// \overload +template<class InputIterator> +inline InputIterator unique(InputIterator first, + InputIterator last, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<InputIterator>::value_type value_type; + + return ::boost::compute::unique( + first, last, ::boost::compute::equal_to<value_type>(), queue + ); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_UNIQUE_HPP diff --git a/boost/compute/algorithm/unique_copy.hpp b/boost/compute/algorithm/unique_copy.hpp new file mode 100644 index 0000000000..2ce60a9359 --- /dev/null +++ b/boost/compute/algorithm/unique_copy.hpp @@ -0,0 +1,164 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_UNIQUE_COPY_HPP +#define BOOST_COMPUTE_ALGORITHM_UNIQUE_COPY_HPP + +#include <boost/compute/command_queue.hpp> +#include <boost/compute/lambda.hpp> +#include <boost/compute/system.hpp> +#include <boost/compute/algorithm/copy_if.hpp> +#include <boost/compute/algorithm/transform.hpp> +#include <boost/compute/algorithm/gather.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/functional/operator.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class InputIterator, class OutputIterator, class BinaryPredicate> +inline OutputIterator serial_unique_copy(InputIterator first, + InputIterator last, + OutputIterator result, + BinaryPredicate op, + command_queue &queue) +{ + if(first == last){ + return result; + } + + typedef typename std::iterator_traits<InputIterator>::value_type value_type; + + const context &context = queue.get_context(); + + size_t count = detail::iterator_range_size(first, last); + + detail::meta_kernel k("serial_unique_copy"); + + vector<uint_> unique_count_vector(1, context); + + size_t size_arg = k.add_arg<const uint_>("size"); + size_t unique_count_arg = k.add_arg<uint_ *>(memory_object::global_memory, "unique_count"); + + k << k.decl<uint_>("index") << " = 0;\n" + << k.decl<value_type>("current") << " = " << first[k.var<uint_>("0")] << ";\n" + << result[k.var<uint_>("0")] << " = current;\n" + << "for(uint i = 1; i < size; i++){\n" + << " " << k.decl<value_type>("next") << " = " << first[k.var<uint_>("i")] << ";\n" + << " if(!" << op(k.var<value_type>("current"), k.var<value_type>("next")) << "){\n" + << " " << result[k.var<uint_>("++index")] << " = next;\n" + << " " << "current = next;\n" + << " }\n" + << "}\n" + << "*unique_count = index + 1;\n"; + + k.set_arg<const uint_>(size_arg, count); + k.set_arg(unique_count_arg, unique_count_vector.get_buffer()); + + k.exec_1d(queue, 0, 1, 1); + + uint_ unique_count; + copy_n(unique_count_vector.begin(), 1, &unique_count, queue); + + return result + unique_count; +} + +template<class InputIterator, class OutputIterator, class BinaryPredicate> +inline OutputIterator unique_copy(InputIterator first, + InputIterator last, + OutputIterator result, + BinaryPredicate op, + command_queue &queue) +{ + if(first == last){ + return result; + } + + const context &context = queue.get_context(); + size_t count = detail::iterator_range_size(first, last); + + // flags marking unique elements + vector<uint_> flags(count, context); + + // find each unique element and mark it with a one + transform( + first, last - 1, first + 1, flags.begin() + 1, not2(op), queue + ); + + // first element is always unique + fill_n(flags.begin(), 1, 1, queue); + + // storage for desination indices + vector<uint_> indices(count, context); + + // copy indices for each unique element + vector<uint_>::iterator last_index = detail::copy_index_if( + flags.begin(), flags.end(), indices.begin(), lambda::_1 == 1, queue + ); + + // copy unique values from input to output using the computed indices + gather(indices.begin(), last_index, first, result, queue); + + // return an iterator to the end of the unique output range + return result + std::distance(indices.begin(), last_index); +} + +} // end detail namespace + +/// Makes a copy of the range [first, last) and removes all consecutive +/// duplicate elements (determined by \p op) from the copy. If \p op is not +/// provided, the equality operator is used. +/// +/// \param first first element in the input range +/// \param last last element in the input range +/// \param result first element in the result range +/// \param op binary operator used to check for uniqueness +/// \param queue command queue to perform the operation +/// +/// \return \c OutputIterator to the end of the result range +/// +/// \see unique() +template<class InputIterator, class OutputIterator, class BinaryPredicate> +inline OutputIterator unique_copy(InputIterator first, + InputIterator last, + OutputIterator result, + BinaryPredicate op, + command_queue &queue = system::default_queue()) +{ + size_t count = detail::iterator_range_size(first, last); + if(count < 32){ + return detail::serial_unique_copy(first, last, result, op, queue); + } + else { + return detail::unique_copy(first, last, result, op, queue); + } +} + +/// \overload +template<class InputIterator, class OutputIterator> +inline OutputIterator unique_copy(InputIterator first, + InputIterator last, + OutputIterator result, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<InputIterator>::value_type value_type; + + return ::boost::compute::unique_copy( + first, last, result, ::boost::compute::equal_to<value_type>(), queue + ); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_UNIQUE_COPY_HPP diff --git a/boost/compute/algorithm/upper_bound.hpp b/boost/compute/algorithm/upper_bound.hpp new file mode 100644 index 0000000000..a5a82d301c --- /dev/null +++ b/boost/compute/algorithm/upper_bound.hpp @@ -0,0 +1,43 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALGORITHM_UPPER_BOUND_HPP +#define BOOST_COMPUTE_ALGORITHM_UPPER_BOUND_HPP + +#include <boost/compute/lambda.hpp> +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/detail/binary_find.hpp> + +namespace boost { +namespace compute { + +/// Returns an iterator pointing to the first element in the sorted +/// range [\p first, \p last) that is not less than or equal to +/// \p value. +template<class InputIterator, class T> +inline InputIterator +upper_bound(InputIterator first, + InputIterator last, + const T &value, + command_queue &queue = system::default_queue()) +{ + using ::boost::compute::_1; + + InputIterator position = + detail::binary_find(first, last, _1 > value, queue); + + return position; +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALGORITHM_UPPER_BOUND_HPP diff --git a/boost/compute/allocator.hpp b/boost/compute/allocator.hpp new file mode 100644 index 0000000000..e3db491b42 --- /dev/null +++ b/boost/compute/allocator.hpp @@ -0,0 +1,21 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALLOCATOR_HPP +#define BOOST_COMPUTE_ALLOCATOR_HPP + +/// \file +/// +/// Meta-header to include all Boost.Compute allocator headers. + +#include <boost/compute/allocator/buffer_allocator.hpp> +#include <boost/compute/allocator/pinned_allocator.hpp> + +#endif // BOOST_COMPUTE_ALLOCATOR_HPP diff --git a/boost/compute/allocator/buffer_allocator.hpp b/boost/compute/allocator/buffer_allocator.hpp new file mode 100644 index 0000000000..2139a97896 --- /dev/null +++ b/boost/compute/allocator/buffer_allocator.hpp @@ -0,0 +1,118 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALLOCATOR_BUFFER_ALLOCATOR_HPP +#define BOOST_COMPUTE_ALLOCATOR_BUFFER_ALLOCATOR_HPP + +#include <boost/compute/buffer.hpp> +#include <boost/compute/config.hpp> +#include <boost/compute/context.hpp> +#include <boost/compute/detail/device_ptr.hpp> + +namespace boost { +namespace compute { + +/// \class buffer_allocator +/// \brief The buffer_allocator class allocates memory with \ref buffer objects +/// +/// \see buffer +template<class T> +class buffer_allocator +{ +public: + typedef T value_type; + typedef detail::device_ptr<T> pointer; + typedef const detail::device_ptr<T> const_pointer; + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + + explicit buffer_allocator(const context &context) + : m_context(context), + m_mem_flags(buffer::read_write) + { + } + + buffer_allocator(const buffer_allocator<T> &other) + : m_context(other.m_context), + m_mem_flags(other.m_mem_flags) + { + } + + buffer_allocator<T>& operator=(const buffer_allocator<T> &other) + { + if(this != &other){ + m_context = other.m_context; + m_mem_flags = other.m_mem_flags; + } + + return *this; + } + + #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES + buffer_allocator(buffer_allocator<T>&& other) BOOST_NOEXCEPT + : m_context(std::move(other.m_context)), + m_mem_flags(other.m_mem_flags) + { + } + + buffer_allocator<T>& operator=(buffer_allocator<T>&& other) BOOST_NOEXCEPT + { + m_context = std::move(other.m_context); + m_mem_flags = other.m_mem_flags; + + return *this; + } + #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES + + ~buffer_allocator() + { + } + + pointer allocate(size_type n) + { + buffer buf(m_context, n * sizeof(T), m_mem_flags); + clRetainMemObject(buf.get()); + return detail::device_ptr<T>(buf); + } + + void deallocate(pointer p, size_type n) + { + BOOST_ASSERT(p.get_buffer().get_context() == m_context); + + (void) n; + + clReleaseMemObject(p.get_buffer().get()); + } + + size_type max_size() const + { + return m_context.get_device().max_memory_alloc_size() / sizeof(T); + } + + context get_context() const + { + return m_context; + } + +protected: + void set_mem_flags(cl_mem_flags flags) + { + m_mem_flags = flags; + } + +private: + context m_context; + cl_mem_flags m_mem_flags; +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALLOCATOR_BUFFER_ALLOCATOR_HPP diff --git a/boost/compute/allocator/pinned_allocator.hpp b/boost/compute/allocator/pinned_allocator.hpp new file mode 100644 index 0000000000..9c85d93e79 --- /dev/null +++ b/boost/compute/allocator/pinned_allocator.hpp @@ -0,0 +1,53 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ALLOCATOR_PINNED_ALLOCATOR_HPP +#define BOOST_COMPUTE_ALLOCATOR_PINNED_ALLOCATOR_HPP + +#include <boost/compute/allocator/buffer_allocator.hpp> + +namespace boost { +namespace compute { + +template<class T> +class pinned_allocator : public buffer_allocator<T> +{ +public: + explicit pinned_allocator(const context &context) + : buffer_allocator<T>(context) + { + buffer_allocator<T>::set_mem_flags( + buffer::read_write | buffer::alloc_host_ptr + ); + } + + pinned_allocator(const pinned_allocator<T> &other) + : buffer_allocator<T>(other) + { + } + + pinned_allocator<T>& operator=(const pinned_allocator<T> &other) + { + if(this != &other){ + buffer_allocator<T>::operator=(other); + } + + return *this; + } + + ~pinned_allocator() + { + } +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ALLOCATOR_PINNED_ALLOCATOR_HPP diff --git a/boost/compute/async.hpp b/boost/compute/async.hpp new file mode 100644 index 0000000000..415dedda7d --- /dev/null +++ b/boost/compute/async.hpp @@ -0,0 +1,21 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ASYNC_HPP +#define BOOST_COMPUTE_ASYNC_HPP + +/// \file +/// +/// Meta-header to include all Boost.Compute async headers. + +#include <boost/compute/async/future.hpp> +#include <boost/compute/async/wait_guard.hpp> + +#endif // BOOST_COMPUTE_ASYNC_HPP diff --git a/boost/compute/async/future.hpp b/boost/compute/async/future.hpp new file mode 100644 index 0000000000..f7f7780deb --- /dev/null +++ b/boost/compute/async/future.hpp @@ -0,0 +1,166 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ASYNC_FUTURE_HPP +#define BOOST_COMPUTE_ASYNC_FUTURE_HPP + +#include <boost/compute/event.hpp> + +namespace boost { +namespace compute { + +/// \class future +/// \brief Holds the result of an asynchronous computation. +/// +/// \see event, wait_list +template<class T> +class future +{ +public: + future() + : m_event(0) + { + } + + future(const T &result, const event &event) + : m_result(result), + m_event(event) + { + } + + future(const future<T> &other) + : m_result(other.m_result), + m_event(other.m_event) + { + } + + future& operator=(const future<T> &other) + { + if(this != &other){ + m_result = other.m_result; + m_event = other.m_event; + } + + return *this; + } + + ~future() + { + } + + /// Returns the result of the computation. This will block until + /// the result is ready. + T get() + { + wait(); + + return m_result; + } + + /// Returns \c true if the future is valid. + bool valid() const + { + return m_event != 0; + } + + /// Blocks until the computation is complete. + void wait() const + { + m_event.wait(); + } + + /// Returns the underlying event object. + event get_event() const + { + return m_event; + } + +private: + T m_result; + event m_event; +}; + +/// \internal_ +template<> +class future<void> +{ +public: + future() + : m_event(0) + { + } + + template<class T> + future(const future<T> &other) + : m_event(other.get_event()) + { + } + + explicit future(const event &event) + : m_event(event) + { + } + + template<class T> + future<void> &operator=(const future<T> &other) + { + m_event = other.get_event(); + + return *this; + } + + future<void> &operator=(const future<void> &other) + { + if(this != &other){ + m_event = other.m_event; + } + + return *this; + } + + ~future() + { + } + + void get() + { + wait(); + } + + bool valid() const + { + return m_event != 0; + } + + void wait() const + { + m_event.wait(); + } + + event get_event() const + { + return m_event; + } + +private: + event m_event; +}; + +/// \internal_ +template<class Result> +inline future<Result> make_future(const Result &result, const event &event) +{ + return future<Result>(result, event); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ASYNC_FUTURE_HPP diff --git a/boost/compute/async/wait.hpp b/boost/compute/async/wait.hpp new file mode 100644 index 0000000000..dacf0feb0c --- /dev/null +++ b/boost/compute/async/wait.hpp @@ -0,0 +1,56 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ASYNC_WAIT_HPP +#define BOOST_COMPUTE_ASYNC_WAIT_HPP + +#include <boost/compute/config.hpp> +#include <boost/compute/utility/wait_list.hpp> + +namespace boost { +namespace compute { +namespace detail { + +#ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES +template<class Event> +inline void insert_events_variadic(wait_list &l, Event&& event) +{ + l.insert(std::forward<Event>(event)); +} + +template<class Event, class... Rest> +inline void insert_events_variadic(wait_list &l, Event&& event, Rest&&... rest) +{ + l.insert(std::forward<Event>(event)); + + insert_events_variadic(l, std::forward<Rest>(rest)...); +} +#endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES + +} // end detail namespace + +#ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES +/// Blocks until all events have completed. Events can either be \ref event +/// objects or \ref future "future<T>" objects. +/// +/// \see event, wait_list +template<class... Events> +inline void wait_for_all(Events&&... events) +{ + wait_list l; + detail::insert_events_variadic(l, std::forward<Events>(events)...); + l.wait(); +} +#endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ASYNC_WAIT_HPP diff --git a/boost/compute/async/wait_guard.hpp b/boost/compute/async/wait_guard.hpp new file mode 100644 index 0000000000..46018fa35a --- /dev/null +++ b/boost/compute/async/wait_guard.hpp @@ -0,0 +1,63 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ASYNC_WAIT_GUARD_HPP +#define BOOST_COMPUTE_ASYNC_WAIT_GUARD_HPP + +#include <boost/noncopyable.hpp> + +namespace boost { +namespace compute { + +/// \class wait_guard +/// \brief A guard object for synchronizing an operation on the device +/// +/// The wait_guard class stores a waitable object representing an operation +/// on a compute device (e.g. \ref event, \ref future "future<T>") and calls +/// its \c wait() method when the guard object goes out of scope. +/// +/// This is useful for ensuring that an OpenCL operation completes before +/// leaving the current scope and cleaning up any resources. +/// +/// For example: +/// \code +/// // enqueue a compute kernel for execution +/// event e = queue.enqueue_nd_range_kernel(...); +/// +/// // call e.wait() upon exiting the current scope +/// wait_guard<event> guard(e); +/// \endcode +/// +/// \ref wait_list, wait_for_all() +template<class Waitable> +class wait_guard : boost::noncopyable +{ +public: + /// Creates a new wait_guard object for \p waitable. + wait_guard(const Waitable &waitable) + : m_waitable(waitable) + { + } + + /// Destroys the wait_guard object. The default implementation will call + /// \c wait() on the stored waitable object. + ~wait_guard() + { + m_waitable.wait(); + } + +private: + Waitable m_waitable; +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ASYNC_WAIT_GUARD_HPP diff --git a/boost/compute/buffer.hpp b/boost/compute/buffer.hpp new file mode 100644 index 0000000000..b5a48806d5 --- /dev/null +++ b/boost/compute/buffer.hpp @@ -0,0 +1,227 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_BUFFER_HPP +#define BOOST_COMPUTE_BUFFER_HPP + +#include <boost/compute/config.hpp> +#include <boost/compute/context.hpp> +#include <boost/compute/exception.hpp> +#include <boost/compute/memory_object.hpp> +#include <boost/compute/detail/get_object_info.hpp> + +namespace boost { +namespace compute { + +// forward declarations +class command_queue; + +/// \class buffer +/// \brief A memory buffer on a compute device. +/// +/// The buffer class represents a memory buffer on a compute device. +/// +/// Buffers are allocated within a compute context. For example, to allocate +/// a memory buffer for 32 float's: +/// +/// \snippet test/test_buffer.cpp constructor +/// +/// Once created, data can be copied to and from the buffer using the +/// \c enqueue_*_buffer() methods in the command_queue class. For example, to +/// copy a set of \c int values from the host to the device: +/// \code +/// int data[] = { 1, 2, 3, 4 }; +/// +/// queue.enqueue_write_buffer(buf, 0, 4 * sizeof(int), data); +/// \endcode +/// +/// Also see the copy() algorithm for a higher-level interface to copying data +/// between the host and the device. For a higher-level, dynamically-resizable, +/// type-safe container for data on a compute device, use the vector<T> class. +/// +/// Buffer objects have reference semantics. Creating a copy of a buffer +/// object simply creates another reference to the underlying OpenCL memory +/// object. To create an actual copy use the buffer::clone() method. +/// +/// \see context, command_queue +class buffer : public memory_object +{ +public: + /// Creates a null buffer object. + buffer() + : memory_object() + { + } + + /// Creates a buffer object for \p mem. If \p retain is \c true, the + /// reference count for \p mem will be incremented. + explicit buffer(cl_mem mem, bool retain = true) + : memory_object(mem, retain) + { + } + + /// Create a new memory buffer in of \p size with \p flags in + /// \p context. + /// + /// \see_opencl_ref{clCreateBuffer} + buffer(const context &context, + size_t size, + cl_mem_flags flags = read_write, + void *host_ptr = 0) + { + cl_int error = 0; + m_mem = clCreateBuffer(context, + flags, + (std::max)(size, size_t(1)), + host_ptr, + &error); + if(!m_mem){ + BOOST_THROW_EXCEPTION(opencl_error(error)); + } + } + + /// Creates a new buffer object as a copy of \p other. + buffer(const buffer &other) + : memory_object(other) + { + } + + /// Copies the buffer object from \p other to \c *this. + buffer& operator=(const buffer &other) + { + if(this != &other){ + memory_object::operator=(other); + } + + return *this; + } + + #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES + /// Move-constructs a new buffer object from \p other. + buffer(buffer&& other) BOOST_NOEXCEPT + : memory_object(std::move(other)) + { + } + + /// Move-assigns the buffer from \p other to \c *this. + buffer& operator=(buffer&& other) BOOST_NOEXCEPT + { + memory_object::operator=(std::move(other)); + + return *this; + } + #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES + + /// Destroys the buffer object. + ~buffer() + { + } + + /// Returns the size of the buffer in bytes. + size_t size() const + { + return get_memory_size(); + } + + /// \internal_ + size_t max_size() const + { + return get_context().get_device().max_memory_alloc_size(); + } + + /// Returns information about the buffer. + /// + /// \see_opencl_ref{clGetMemObjectInfo} + template<class T> + T get_info(cl_mem_info info) const + { + return get_memory_info<T>(info); + } + + /// \overload + template<int Enum> + typename detail::get_object_info_type<buffer, Enum>::type + get_info() const; + + /// Creates a new buffer with a copy of the data in \c *this. Uses + /// \p queue to perform the copy. + buffer clone(command_queue &queue) const; + + #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + /// Creates a new buffer out of this buffer. + /// The new buffer is a sub region of this buffer. + /// \p flags The mem_flags which should be used to create the new buffer + /// \p origin The start index in this buffer + /// \p size The size of the new sub buffer + /// + /// \see_opencl_ref{clCreateSubBuffer} + /// + /// \opencl_version_warning{1,1} + buffer create_subbuffer(cl_mem_flags flags, size_t origin, + size_t size) + { + BOOST_ASSERT(origin + size <= this->size()); + BOOST_ASSERT(origin % (get_context(). + get_device(). + get_info<CL_DEVICE_MEM_BASE_ADDR_ALIGN>() / 8) == 0); + cl_int error = 0; + + cl_buffer_region region = { origin, size }; + + cl_mem mem = clCreateSubBuffer(m_mem, + flags, + CL_BUFFER_CREATE_TYPE_REGION, + ®ion, + &error); + + if(!mem){ + BOOST_THROW_EXCEPTION(opencl_error(error)); + } + + return buffer(mem, false); + } + #endif // CL_VERSION_1_1 +}; + +/// \internal_ define get_info() specializations for buffer +BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(buffer, + ((cl_mem_object_type, CL_MEM_TYPE)) + ((cl_mem_flags, CL_MEM_FLAGS)) + ((size_t, CL_MEM_SIZE)) + ((void *, CL_MEM_HOST_PTR)) + ((cl_uint, CL_MEM_MAP_COUNT)) + ((cl_uint, CL_MEM_REFERENCE_COUNT)) + ((cl_context, CL_MEM_CONTEXT)) +) + +#ifdef CL_VERSION_1_1 +BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(buffer, + ((cl_mem, CL_MEM_ASSOCIATED_MEMOBJECT)) + ((size_t, CL_MEM_OFFSET)) +) +#endif // CL_VERSION_1_1 + +namespace detail { + +// set_kernel_arg specialization for buffer +template<> +struct set_kernel_arg<buffer> +{ + void operator()(kernel &kernel_, size_t index, const buffer &buffer_) + { + kernel_.set_arg(index, buffer_.get()); + } +}; + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_BUFFER_HPP diff --git a/boost/compute/cl.hpp b/boost/compute/cl.hpp new file mode 100644 index 0000000000..c439d8dfdc --- /dev/null +++ b/boost/compute/cl.hpp @@ -0,0 +1,20 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_CL_HPP +#define BOOST_COMPUTE_CL_HPP + +#if defined(__APPLE__) +#include <OpenCL/cl.h> +#else +#include <CL/cl.h> +#endif + +#endif // BOOST_COMPUTE_CL_HPP diff --git a/boost/compute/cl_ext.hpp b/boost/compute/cl_ext.hpp new file mode 100644 index 0000000000..0b21a12fd7 --- /dev/null +++ b/boost/compute/cl_ext.hpp @@ -0,0 +1,20 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_CL_EXT_HPP +#define BOOST_COMPUTE_CL_EXT_HPP + +#if defined(__APPLE__) +#include <OpenCL/cl_ext.h> +#else +#include <CL/cl_ext.h> +#endif + +#endif // BOOST_COMPUTE_CL_EXT_HPP diff --git a/boost/compute/closure.hpp b/boost/compute/closure.hpp new file mode 100644 index 0000000000..6e3cbe702b --- /dev/null +++ b/boost/compute/closure.hpp @@ -0,0 +1,347 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_CLOSURE_HPP +#define BOOST_COMPUTE_CLOSURE_HPP + +#include <string> +#include <sstream> + +#include <boost/config.hpp> +#include <boost/fusion/adapted/boost_tuple.hpp> +#include <boost/fusion/algorithm/iteration/for_each.hpp> +#include <boost/mpl/for_each.hpp> +#include <boost/mpl/transform.hpp> +#include <boost/typeof/typeof.hpp> +#include <boost/static_assert.hpp> +#include <boost/algorithm/string.hpp> +#include <boost/tuple/tuple.hpp> +#include <boost/type_traits/function_traits.hpp> + +#include <boost/compute/cl.hpp> +#include <boost/compute/function.hpp> +#include <boost/compute/type_traits/type_name.hpp> +#include <boost/compute/type_traits/detail/capture_traits.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class ResultType, class ArgTuple, class CaptureTuple> +class invoked_closure +{ +public: + typedef ResultType result_type; + + BOOST_STATIC_CONSTANT( + size_t, arity = boost::tuples::length<ArgTuple>::value + ); + + invoked_closure(const std::string &name, + const std::string &source, + const std::map<std::string, std::string> &definitions, + const ArgTuple &args, + const CaptureTuple &capture) + : m_name(name), + m_source(source), + m_definitions(definitions), + m_args(args), + m_capture(capture) + { + } + + std::string name() const + { + return m_name; + } + + std::string source() const + { + return m_source; + } + + const std::map<std::string, std::string>& definitions() const + { + return m_definitions; + } + + const ArgTuple& args() const + { + return m_args; + } + + const CaptureTuple& capture() const + { + return m_capture; + } + +private: + std::string m_name; + std::string m_source; + std::map<std::string, std::string> m_definitions; + ArgTuple m_args; + CaptureTuple m_capture; +}; + +} // end detail namespace + +/// \internal_ +template<class Signature, class CaptureTuple> +class closure +{ +public: + typedef typename + boost::function_traits<Signature>::result_type result_type; + + BOOST_STATIC_CONSTANT( + size_t, arity = boost::function_traits<Signature>::arity + ); + + closure(const std::string &name, + const CaptureTuple &capture, + const std::string &source) + : m_name(name), + m_source(source), + m_capture(capture) + { + } + + ~closure() + { + } + + std::string name() const + { + return m_name; + } + + /// \internal_ + std::string source() const + { + return m_source; + } + + /// \internal_ + void define(std::string name, std::string value = std::string()) + { + m_definitions[name] = value; + } + + /// \internal_ + detail::invoked_closure<result_type, boost::tuple<>, CaptureTuple> + operator()() const + { + BOOST_STATIC_ASSERT_MSG( + arity == 0, + "Non-nullary closure function invoked with zero arguments" + ); + + return detail::invoked_closure<result_type, boost::tuple<>, CaptureTuple>( + m_name, m_source, m_definitions, boost::make_tuple(), m_capture + ); + } + + /// \internal_ + template<class Arg1> + detail::invoked_closure<result_type, boost::tuple<Arg1>, CaptureTuple> + operator()(const Arg1 &arg1) const + { + BOOST_STATIC_ASSERT_MSG( + arity == 1, + "Non-unary closure function invoked with one argument" + ); + + return detail::invoked_closure<result_type, boost::tuple<Arg1>, CaptureTuple>( + m_name, m_source, m_definitions, boost::make_tuple(arg1), m_capture + ); + } + + /// \internal_ + template<class Arg1, class Arg2> + detail::invoked_closure<result_type, boost::tuple<Arg1, Arg2>, CaptureTuple> + operator()(const Arg1 &arg1, const Arg2 &arg2) const + { + BOOST_STATIC_ASSERT_MSG( + arity == 2, + "Non-binary closure function invoked with two arguments" + ); + + return detail::invoked_closure<result_type, boost::tuple<Arg1, Arg2>, CaptureTuple>( + m_name, m_source, m_definitions, boost::make_tuple(arg1, arg2), m_capture + ); + } + + /// \internal_ + template<class Arg1, class Arg2, class Arg3> + detail::invoked_closure<result_type, boost::tuple<Arg1, Arg2, Arg3>, CaptureTuple> + operator()(const Arg1 &arg1, const Arg2 &arg2, const Arg3 &arg3) const + { + BOOST_STATIC_ASSERT_MSG( + arity == 3, + "Non-ternary closure function invoked with three arguments" + ); + + return detail::invoked_closure<result_type, boost::tuple<Arg1, Arg2, Arg3>, CaptureTuple>( + m_name, m_source, m_definitions, boost::make_tuple(arg1, arg2, arg3), m_capture + ); + } + +private: + std::string m_name; + std::string m_source; + std::map<std::string, std::string> m_definitions; + CaptureTuple m_capture; +}; + +namespace detail { + +struct closure_signature_argument_inserter +{ + closure_signature_argument_inserter(std::stringstream &s_, + const char *capture_string, + size_t last) + : s(s_) + { + n = 0; + m_last = last; + + size_t capture_string_length = std::strlen(capture_string); + BOOST_ASSERT(capture_string[0] == '(' && + capture_string[capture_string_length-1] == ')'); + std::string capture_string_(capture_string + 1, capture_string_length - 2); + boost::split(m_capture_names, capture_string_ , boost::is_any_of(",")); + } + + template<class T> + void operator()(const T&) const + { + BOOST_ASSERT(n < m_capture_names.size()); + + // get captured variable name + std::string variable_name = m_capture_names[n]; + + // remove leading and trailing whitespace from variable name + boost::trim(variable_name); + + s << capture_traits<T>::type_name() << " " << variable_name; + if(n+1 < m_last){ + s << ", "; + } + n++; + } + + mutable size_t n; + size_t m_last; + std::vector<std::string> m_capture_names; + std::stringstream &s; +}; + +template<class Signature, class CaptureTuple> +inline std::string +make_closure_declaration(const char *name, + const char *arguments, + const CaptureTuple &capture_tuple, + const char *capture_string) +{ + typedef typename + boost::function_traits<Signature>::result_type result_type; + typedef typename + boost::function_types::parameter_types<Signature>::type parameter_types; + typedef typename + mpl::size<parameter_types>::type arity_type; + + std::stringstream s; + s << "inline " << type_name<result_type>() << " " << name; + s << "("; + + // insert function arguments + signature_argument_inserter i(s, arguments, arity_type::value); + mpl::for_each< + typename mpl::transform<parameter_types, boost::add_pointer<mpl::_1> + >::type>(i); + s << ", "; + + // insert capture arguments + closure_signature_argument_inserter j( + s, capture_string, boost::tuples::length<CaptureTuple>::value + ); + fusion::for_each(capture_tuple, j); + + s << ")"; + return s.str(); +} + +// used by the BOOST_COMPUTE_CLOSURE() macro to create a closure +// function with the given signature, name, capture, and source. +template<class Signature, class CaptureTuple> +inline closure<Signature, CaptureTuple> +make_closure_impl(const char *name, + const char *arguments, + const CaptureTuple &capture, + const char *capture_string, + const std::string &source) +{ + std::stringstream s; + s << make_closure_declaration<Signature>(name, arguments, capture, capture_string); + s << source; + + return closure<Signature, CaptureTuple>(name, capture, s.str()); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +/// Creates a closure function object with \p name and \p source. +/// +/// \param return_type The return type for the function. +/// \param name The name of the function. +/// \param arguments A list of arguments for the function. +/// \param capture A list of variables to capture. +/// \param source The OpenCL C source code for the function. +/// +/// For example, to create a function which checks if a 2D point is +/// contained in a circle of a given radius: +/// \code +/// // radius variable declared in C++ +/// float radius = 1.5f; +/// +/// // create a closure function which returns true if the 2D point +/// // argument is contained within a circle of the given radius +/// BOOST_COMPUTE_CLOSURE(bool, is_in_circle, (const float2_ p), (radius), +/// { +/// return sqrt(p.x*p.x + p.y*p.y) < radius; +/// }); +/// +/// // vector of 2D points +/// boost::compute::vector<float2_> points = ... +/// +/// // count number of points in the circle +/// size_t count = boost::compute::count_if( +/// points.begin(), points.end(), is_in_circle, queue +/// ); +/// \endcode +/// +/// \see BOOST_COMPUTE_FUNCTION() +#ifdef BOOST_COMPUTE_DOXYGEN_INVOKED +#define BOOST_COMPUTE_CLOSURE(return_type, name, arguments, capture, source) +#else +#define BOOST_COMPUTE_CLOSURE(return_type, name, arguments, capture, ...) \ + ::boost::compute::closure< \ + return_type arguments, BOOST_TYPEOF(boost::tie capture) \ + > name = \ + ::boost::compute::detail::make_closure_impl< \ + return_type arguments \ + >( \ + #name, #arguments, boost::tie capture, #capture, #__VA_ARGS__ \ + ) +#endif + +#endif // BOOST_COMPUTE_CLOSURE_HPP diff --git a/boost/compute/command_queue.hpp b/boost/compute/command_queue.hpp new file mode 100644 index 0000000000..2a1328a959 --- /dev/null +++ b/boost/compute/command_queue.hpp @@ -0,0 +1,1881 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_COMMAND_QUEUE_HPP +#define BOOST_COMPUTE_COMMAND_QUEUE_HPP + +#include <cstddef> +#include <algorithm> + +#include <boost/assert.hpp> + +#include <boost/compute/config.hpp> +#include <boost/compute/event.hpp> +#include <boost/compute/buffer.hpp> +#include <boost/compute/device.hpp> +#include <boost/compute/kernel.hpp> +#include <boost/compute/context.hpp> +#include <boost/compute/exception.hpp> +#include <boost/compute/image/image1d.hpp> +#include <boost/compute/image/image2d.hpp> +#include <boost/compute/image/image3d.hpp> +#include <boost/compute/image/image_object.hpp> +#include <boost/compute/utility/wait_list.hpp> +#include <boost/compute/detail/get_object_info.hpp> +#include <boost/compute/detail/assert_cl_success.hpp> +#include <boost/compute/detail/diagnostic.hpp> +#include <boost/compute/utility/extents.hpp> + +namespace boost { +namespace compute { +namespace detail { + +inline void BOOST_COMPUTE_CL_CALLBACK +nullary_native_kernel_trampoline(void *user_func_ptr) +{ + void (*user_func)(); + std::memcpy(&user_func, user_func_ptr, sizeof(user_func)); + user_func(); +} + +} // end detail namespace + +/// \class command_queue +/// \brief A command queue. +/// +/// Command queues provide the interface for interacting with compute +/// devices. The command_queue class provides methods to copy data to +/// and from a compute device as well as execute compute kernels. +/// +/// Command queues are created for a compute device within a compute +/// context. +/// +/// For example, to create a context and command queue for the default device +/// on the system (this is the normal set up code used by almost all OpenCL +/// programs): +/// \code +/// #include <boost/compute/core.hpp> +/// +/// // get the default compute device +/// boost::compute::device device = boost::compute::system::default_device(); +/// +/// // set up a compute context and command queue +/// boost::compute::context context(device); +/// boost::compute::command_queue queue(context, device); +/// \endcode +/// +/// The default command queue for the system can be obtained with the +/// system::default_queue() method. +/// +/// \see buffer, context, kernel +class command_queue +{ +public: + enum properties { + enable_profiling = CL_QUEUE_PROFILING_ENABLE, + enable_out_of_order_execution = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE + }; + + enum map_flags { + map_read = CL_MAP_READ, + map_write = CL_MAP_WRITE + #ifdef CL_VERSION_1_2 + , + map_write_invalidate_region = CL_MAP_WRITE_INVALIDATE_REGION + #endif + }; + + /// Creates a null command queue. + command_queue() + : m_queue(0) + { + } + + explicit command_queue(cl_command_queue queue, bool retain = true) + : m_queue(queue) + { + if(m_queue && retain){ + clRetainCommandQueue(m_queue); + } + } + + /// Creates a command queue in \p context for \p device with + /// \p properties. + /// + /// \see_opencl_ref{clCreateCommandQueue} + command_queue(const context &context, + const device &device, + cl_command_queue_properties properties = 0) + { + BOOST_ASSERT(device.id() != 0); + + cl_int error = 0; + + #ifdef CL_VERSION_2_0 + if (device.check_version(2, 0)){ + std::vector<cl_queue_properties> queue_properties; + if(properties){ + queue_properties.push_back(CL_QUEUE_PROPERTIES); + queue_properties.push_back(cl_queue_properties(properties)); + queue_properties.push_back(cl_queue_properties(0)); + } + + const cl_queue_properties *queue_properties_ptr = + queue_properties.empty() ? 0 : &queue_properties[0]; + + m_queue = clCreateCommandQueueWithProperties( + context, device.id(), queue_properties_ptr, &error + ); + } else + #endif + { + // Suppress deprecated declarations warning + BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS(); + m_queue = clCreateCommandQueue( + context, device.id(), properties, &error + ); + BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS(); + } + + if(!m_queue){ + BOOST_THROW_EXCEPTION(opencl_error(error)); + } + } + + /// Creates a new command queue object as a copy of \p other. + command_queue(const command_queue &other) + : m_queue(other.m_queue) + { + if(m_queue){ + clRetainCommandQueue(m_queue); + } + } + + /// Copies the command queue object from \p other to \c *this. + command_queue& operator=(const command_queue &other) + { + if(this != &other){ + if(m_queue){ + clReleaseCommandQueue(m_queue); + } + + m_queue = other.m_queue; + + if(m_queue){ + clRetainCommandQueue(m_queue); + } + } + + return *this; + } + + #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES + /// Move-constructs a new command queue object from \p other. + command_queue(command_queue&& other) BOOST_NOEXCEPT + : m_queue(other.m_queue) + { + other.m_queue = 0; + } + + /// Move-assigns the command queue from \p other to \c *this. + command_queue& operator=(command_queue&& other) BOOST_NOEXCEPT + { + if(m_queue){ + clReleaseCommandQueue(m_queue); + } + + m_queue = other.m_queue; + other.m_queue = 0; + + return *this; + } + #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES + + /// Destroys the command queue. + /// + /// \see_opencl_ref{clReleaseCommandQueue} + ~command_queue() + { + if(m_queue){ + BOOST_COMPUTE_ASSERT_CL_SUCCESS( + clReleaseCommandQueue(m_queue) + ); + } + } + + /// Returns the underlying OpenCL command queue. + cl_command_queue& get() const + { + return const_cast<cl_command_queue &>(m_queue); + } + + /// Returns the device that the command queue issues commands to. + device get_device() const + { + return device(get_info<cl_device_id>(CL_QUEUE_DEVICE)); + } + + /// Returns the context for the command queue. + context get_context() const + { + return context(get_info<cl_context>(CL_QUEUE_CONTEXT)); + } + + /// Returns information about the command queue. + /// + /// \see_opencl_ref{clGetCommandQueueInfo} + template<class T> + T get_info(cl_command_queue_info info) const + { + return detail::get_object_info<T>(clGetCommandQueueInfo, m_queue, info); + } + + /// \overload + template<int Enum> + typename detail::get_object_info_type<command_queue, Enum>::type + get_info() const; + + /// Returns the properties for the command queue. + cl_command_queue_properties get_properties() const + { + return get_info<cl_command_queue_properties>(CL_QUEUE_PROPERTIES); + } + + /// Enqueues a command to read data from \p buffer to host memory. + /// + /// \see_opencl_ref{clEnqueueReadBuffer} + /// + /// \see copy() + event enqueue_read_buffer(const buffer &buffer, + size_t offset, + size_t size, + void *host_ptr, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(m_queue != 0); + BOOST_ASSERT(size <= buffer.size()); + BOOST_ASSERT(buffer.get_context() == this->get_context()); + BOOST_ASSERT(host_ptr != 0); + + event event_; + + cl_int ret = clEnqueueReadBuffer( + m_queue, + buffer.get(), + CL_TRUE, + offset, + size, + host_ptr, + events.size(), + events.get_event_ptr(), + &event_.get() + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; + } + + /// Enqueues a command to read data from \p buffer to host memory. The + /// copy will be performed asynchronously. + /// + /// \see_opencl_ref{clEnqueueReadBuffer} + /// + /// \see copy_async() + event enqueue_read_buffer_async(const buffer &buffer, + size_t offset, + size_t size, + void *host_ptr, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(m_queue != 0); + BOOST_ASSERT(size <= buffer.size()); + BOOST_ASSERT(buffer.get_context() == this->get_context()); + BOOST_ASSERT(host_ptr != 0); + + event event_; + + cl_int ret = clEnqueueReadBuffer( + m_queue, + buffer.get(), + CL_FALSE, + offset, + size, + host_ptr, + events.size(), + events.get_event_ptr(), + &event_.get() + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; + } + + #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + /// Enqueues a command to read a rectangular region from \p buffer to + /// host memory. + /// + /// \see_opencl_ref{clEnqueueReadBufferRect} + /// + /// \opencl_version_warning{1,1} + event enqueue_read_buffer_rect(const buffer &buffer, + const size_t buffer_origin[3], + const size_t host_origin[3], + const size_t region[3], + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + void *host_ptr, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(m_queue != 0); + BOOST_ASSERT(buffer.get_context() == this->get_context()); + BOOST_ASSERT(host_ptr != 0); + + event event_; + + cl_int ret = clEnqueueReadBufferRect( + m_queue, + buffer.get(), + CL_TRUE, + buffer_origin, + host_origin, + region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + host_ptr, + events.size(), + events.get_event_ptr(), + &event_.get() + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; + } + + /// Enqueues a command to read a rectangular region from \p buffer to + /// host memory. The copy will be performed asynchronously. + /// + /// \see_opencl_ref{clEnqueueReadBufferRect} + /// + /// \opencl_version_warning{1,1} + event enqueue_read_buffer_rect_async(const buffer &buffer, + const size_t buffer_origin[3], + const size_t host_origin[3], + const size_t region[3], + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + void *host_ptr, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(m_queue != 0); + BOOST_ASSERT(buffer.get_context() == this->get_context()); + BOOST_ASSERT(host_ptr != 0); + + event event_; + + cl_int ret = clEnqueueReadBufferRect( + m_queue, + buffer.get(), + CL_FALSE, + buffer_origin, + host_origin, + region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + host_ptr, + events.size(), + events.get_event_ptr(), + &event_.get() + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; + } + #endif // CL_VERSION_1_1 + + /// Enqueues a command to write data from host memory to \p buffer. + /// + /// \see_opencl_ref{clEnqueueWriteBuffer} + /// + /// \see copy() + event enqueue_write_buffer(const buffer &buffer, + size_t offset, + size_t size, + const void *host_ptr, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(m_queue != 0); + BOOST_ASSERT(size <= buffer.size()); + BOOST_ASSERT(buffer.get_context() == this->get_context()); + BOOST_ASSERT(host_ptr != 0); + + event event_; + + cl_int ret = clEnqueueWriteBuffer( + m_queue, + buffer.get(), + CL_TRUE, + offset, + size, + host_ptr, + events.size(), + events.get_event_ptr(), + &event_.get() + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; + } + + /// Enqueues a command to write data from host memory to \p buffer. + /// The copy is performed asynchronously. + /// + /// \see_opencl_ref{clEnqueueWriteBuffer} + /// + /// \see copy_async() + event enqueue_write_buffer_async(const buffer &buffer, + size_t offset, + size_t size, + const void *host_ptr, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(m_queue != 0); + BOOST_ASSERT(size <= buffer.size()); + BOOST_ASSERT(buffer.get_context() == this->get_context()); + BOOST_ASSERT(host_ptr != 0); + + event event_; + + cl_int ret = clEnqueueWriteBuffer( + m_queue, + buffer.get(), + CL_FALSE, + offset, + size, + host_ptr, + events.size(), + events.get_event_ptr(), + &event_.get() + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; + } + + #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + /// Enqueues a command to write a rectangular region from host memory + /// to \p buffer. + /// + /// \see_opencl_ref{clEnqueueWriteBufferRect} + /// + /// \opencl_version_warning{1,1} + event enqueue_write_buffer_rect(const buffer &buffer, + const size_t buffer_origin[3], + const size_t host_origin[3], + const size_t region[3], + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + void *host_ptr, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(m_queue != 0); + BOOST_ASSERT(buffer.get_context() == this->get_context()); + BOOST_ASSERT(host_ptr != 0); + + event event_; + + cl_int ret = clEnqueueWriteBufferRect( + m_queue, + buffer.get(), + CL_TRUE, + buffer_origin, + host_origin, + region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + host_ptr, + events.size(), + events.get_event_ptr(), + &event_.get() + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; + } + + /// Enqueues a command to write a rectangular region from host memory + /// to \p buffer. The copy is performed asynchronously. + /// + /// \see_opencl_ref{clEnqueueWriteBufferRect} + /// + /// \opencl_version_warning{1,1} + event enqueue_write_buffer_rect_async(const buffer &buffer, + const size_t buffer_origin[3], + const size_t host_origin[3], + const size_t region[3], + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + void *host_ptr, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(m_queue != 0); + BOOST_ASSERT(buffer.get_context() == this->get_context()); + BOOST_ASSERT(host_ptr != 0); + + event event_; + + cl_int ret = clEnqueueWriteBufferRect( + m_queue, + buffer.get(), + CL_FALSE, + buffer_origin, + host_origin, + region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + host_ptr, + events.size(), + events.get_event_ptr(), + &event_.get() + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; + } + #endif // CL_VERSION_1_1 + + /// Enqueues a command to copy data from \p src_buffer to + /// \p dst_buffer. + /// + /// \see_opencl_ref{clEnqueueCopyBuffer} + /// + /// \see copy() + event enqueue_copy_buffer(const buffer &src_buffer, + const buffer &dst_buffer, + size_t src_offset, + size_t dst_offset, + size_t size, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(m_queue != 0); + BOOST_ASSERT(src_offset + size <= src_buffer.size()); + BOOST_ASSERT(dst_offset + size <= dst_buffer.size()); + BOOST_ASSERT(src_buffer.get_context() == this->get_context()); + BOOST_ASSERT(dst_buffer.get_context() == this->get_context()); + + event event_; + + cl_int ret = clEnqueueCopyBuffer( + m_queue, + src_buffer.get(), + dst_buffer.get(), + src_offset, + dst_offset, + size, + events.size(), + events.get_event_ptr(), + &event_.get() + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; + } + + #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + /// Enqueues a command to copy a rectangular region from + /// \p src_buffer to \p dst_buffer. + /// + /// \see_opencl_ref{clEnqueueCopyBufferRect} + /// + /// \opencl_version_warning{1,1} + event enqueue_copy_buffer_rect(const buffer &src_buffer, + const buffer &dst_buffer, + const size_t src_origin[3], + const size_t dst_origin[3], + const size_t region[3], + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(m_queue != 0); + BOOST_ASSERT(src_buffer.get_context() == this->get_context()); + BOOST_ASSERT(dst_buffer.get_context() == this->get_context()); + + event event_; + + cl_int ret = clEnqueueCopyBufferRect( + m_queue, + src_buffer.get(), + dst_buffer.get(), + src_origin, + dst_origin, + region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + events.size(), + events.get_event_ptr(), + &event_.get() + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; + } + #endif // CL_VERSION_1_1 + + #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + /// Enqueues a command to fill \p buffer with \p pattern. + /// + /// \see_opencl_ref{clEnqueueFillBuffer} + /// + /// \opencl_version_warning{1,2} + /// + /// \see fill() + event enqueue_fill_buffer(const buffer &buffer, + const void *pattern, + size_t pattern_size, + size_t offset, + size_t size, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(m_queue != 0); + BOOST_ASSERT(offset + size <= buffer.size()); + BOOST_ASSERT(buffer.get_context() == this->get_context()); + + event event_; + + cl_int ret = clEnqueueFillBuffer( + m_queue, + buffer.get(), + pattern, + pattern_size, + offset, + size, + events.size(), + events.get_event_ptr(), + &event_.get() + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; + } + #endif // CL_VERSION_1_2 + + /// Enqueues a command to map \p buffer into the host address space. + /// Event associated with map operation is returned through + /// \p map_buffer_event parameter. + /// + /// \see_opencl_ref{clEnqueueMapBuffer} + void* enqueue_map_buffer(const buffer &buffer, + cl_map_flags flags, + size_t offset, + size_t size, + event &map_buffer_event, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(m_queue != 0); + BOOST_ASSERT(offset + size <= buffer.size()); + BOOST_ASSERT(buffer.get_context() == this->get_context()); + + cl_int ret = 0; + void *pointer = clEnqueueMapBuffer( + m_queue, + buffer.get(), + CL_TRUE, + flags, + offset, + size, + events.size(), + events.get_event_ptr(), + &map_buffer_event.get(), + &ret + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return pointer; + } + + /// \overload + void* enqueue_map_buffer(const buffer &buffer, + cl_map_flags flags, + size_t offset, + size_t size, + const wait_list &events = wait_list()) + { + event event_; + return enqueue_map_buffer(buffer, flags, offset, size, event_, events); + } + + /// Enqueues a command to map \p buffer into the host address space. + /// Map operation is performed asynchronously. The pointer to the mapped + /// region cannot be used until the map operation has completed. + /// + /// Event associated with map operation is returned through + /// \p map_buffer_event parameter. + /// + /// \see_opencl_ref{clEnqueueMapBuffer} + void* enqueue_map_buffer_async(const buffer &buffer, + cl_map_flags flags, + size_t offset, + size_t size, + event &map_buffer_event, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(m_queue != 0); + BOOST_ASSERT(offset + size <= buffer.size()); + BOOST_ASSERT(buffer.get_context() == this->get_context()); + + cl_int ret = 0; + void *pointer = clEnqueueMapBuffer( + m_queue, + buffer.get(), + CL_FALSE, + flags, + offset, + size, + events.size(), + events.get_event_ptr(), + &map_buffer_event.get(), + &ret + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return pointer; + } + + /// Enqueues a command to unmap \p buffer from the host memory space. + /// + /// \see_opencl_ref{clEnqueueUnmapMemObject} + event enqueue_unmap_buffer(const buffer &buffer, + void *mapped_ptr, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(buffer.get_context() == this->get_context()); + + return enqueue_unmap_mem_object(buffer.get(), mapped_ptr, events); + } + + /// Enqueues a command to unmap \p mem from the host memory space. + /// + /// \see_opencl_ref{clEnqueueUnmapMemObject} + event enqueue_unmap_mem_object(cl_mem mem, + void *mapped_ptr, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(m_queue != 0); + + event event_; + + cl_int ret = clEnqueueUnmapMemObject( + m_queue, + mem, + mapped_ptr, + events.size(), + events.get_event_ptr(), + &event_.get() + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; + } + + /// Enqueues a command to read data from \p image to host memory. + /// + /// \see_opencl_ref{clEnqueueReadImage} + event enqueue_read_image(const image_object& image, + const size_t *origin, + const size_t *region, + size_t row_pitch, + size_t slice_pitch, + void *host_ptr, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(m_queue != 0); + + event event_; + + cl_int ret = clEnqueueReadImage( + m_queue, + image.get(), + CL_TRUE, + origin, + region, + row_pitch, + slice_pitch, + host_ptr, + events.size(), + events.get_event_ptr(), + &event_.get() + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; + } + + /// \overload + template<size_t N> + event enqueue_read_image(const image_object& image, + const extents<N> origin, + const extents<N> region, + void *host_ptr, + size_t row_pitch = 0, + size_t slice_pitch = 0, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(image.get_context() == this->get_context()); + + size_t origin3[3] = { 0, 0, 0 }; + size_t region3[3] = { 1, 1, 1 }; + + std::copy(origin.data(), origin.data() + N, origin3); + std::copy(region.data(), region.data() + N, region3); + + return enqueue_read_image( + image, origin3, region3, row_pitch, slice_pitch, host_ptr, events + ); + } + + /// Enqueues a command to write data from host memory to \p image. + /// + /// \see_opencl_ref{clEnqueueWriteImage} + event enqueue_write_image(image_object& image, + const size_t *origin, + const size_t *region, + const void *host_ptr, + size_t input_row_pitch = 0, + size_t input_slice_pitch = 0, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(m_queue != 0); + + event event_; + + cl_int ret = clEnqueueWriteImage( + m_queue, + image.get(), + CL_TRUE, + origin, + region, + input_row_pitch, + input_slice_pitch, + host_ptr, + events.size(), + events.get_event_ptr(), + &event_.get() + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; + } + + /// \overload + template<size_t N> + event enqueue_write_image(image_object& image, + const extents<N> origin, + const extents<N> region, + const void *host_ptr, + const size_t input_row_pitch = 0, + const size_t input_slice_pitch = 0, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(image.get_context() == this->get_context()); + + size_t origin3[3] = { 0, 0, 0 }; + size_t region3[3] = { 1, 1, 1 }; + + std::copy(origin.data(), origin.data() + N, origin3); + std::copy(region.data(), region.data() + N, region3); + + return enqueue_write_image( + image, origin3, region3, host_ptr, input_row_pitch, input_slice_pitch, events + ); + } + + /// Enqueues a command to map \p image into the host address space. + /// + /// Event associated with map operation is returned through + /// \p map_image_event parameter. + /// + /// \see_opencl_ref{clEnqueueMapImage} + void* enqueue_map_image(const image_object &image, + cl_map_flags flags, + const size_t *origin, + const size_t *region, + size_t &output_row_pitch, + size_t &output_slice_pitch, + event &map_image_event, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(m_queue != 0); + BOOST_ASSERT(image.get_context() == this->get_context()); + + cl_int ret = 0; + void *pointer = clEnqueueMapImage( + m_queue, + image.get(), + CL_TRUE, + flags, + origin, + region, + &output_row_pitch, + &output_slice_pitch, + events.size(), + events.get_event_ptr(), + &map_image_event.get(), + &ret + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return pointer; + } + + /// \overload + void* enqueue_map_image(const image_object &image, + cl_map_flags flags, + const size_t *origin, + const size_t *region, + size_t &output_row_pitch, + size_t &output_slice_pitch, + const wait_list &events = wait_list()) + { + event event_; + return enqueue_map_image( + image, flags, origin, region, + output_row_pitch, output_slice_pitch, event_, events + ); + } + + /// \overload + template<size_t N> + void* enqueue_map_image(image_object& image, + cl_map_flags flags, + const extents<N> origin, + const extents<N> region, + size_t &output_row_pitch, + size_t &output_slice_pitch, + event &map_image_event, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(image.get_context() == this->get_context()); + + size_t origin3[3] = { 0, 0, 0 }; + size_t region3[3] = { 1, 1, 1 }; + + std::copy(origin.data(), origin.data() + N, origin3); + std::copy(region.data(), region.data() + N, region3); + + return enqueue_map_image( + image, flags, origin3, region3, + output_row_pitch, output_slice_pitch, map_image_event, events + ); + } + + /// \overload + template<size_t N> + void* enqueue_map_image(image_object& image, + cl_map_flags flags, + const extents<N> origin, + const extents<N> region, + size_t &output_row_pitch, + size_t &output_slice_pitch, + const wait_list &events = wait_list()) + { + event event_; + return enqueue_map_image( + image, flags, origin, region, + output_row_pitch, output_slice_pitch, event_, events + ); + } + + /// Enqueues a command to map \p image into the host address space. + /// Map operation is performed asynchronously. The pointer to the mapped + /// region cannot be used until the map operation has completed. + /// + /// Event associated with map operation is returned through + /// \p map_image_event parameter. + /// + /// \see_opencl_ref{clEnqueueMapImage} + void* enqueue_map_image_async(const image_object &image, + cl_map_flags flags, + const size_t *origin, + const size_t *region, + size_t &output_row_pitch, + size_t &output_slice_pitch, + event &map_image_event, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(m_queue != 0); + BOOST_ASSERT(image.get_context() == this->get_context()); + + cl_int ret = 0; + void *pointer = clEnqueueMapImage( + m_queue, + image.get(), + CL_FALSE, + flags, + origin, + region, + &output_row_pitch, + &output_slice_pitch, + events.size(), + events.get_event_ptr(), + &map_image_event.get(), + &ret + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return pointer; + } + + /// \overload + template<size_t N> + void* enqueue_map_image_async(image_object& image, + cl_map_flags flags, + const extents<N> origin, + const extents<N> region, + size_t &output_row_pitch, + size_t &output_slice_pitch, + event &map_image_event, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(image.get_context() == this->get_context()); + + size_t origin3[3] = { 0, 0, 0 }; + size_t region3[3] = { 1, 1, 1 }; + + std::copy(origin.data(), origin.data() + N, origin3); + std::copy(region.data(), region.data() + N, region3); + + return enqueue_map_image_async( + image, flags, origin3, region3, + output_row_pitch, output_slice_pitch, map_image_event, events + ); + } + + /// Enqueues a command to unmap \p image from the host memory space. + /// + /// \see_opencl_ref{clEnqueueUnmapMemObject} + event enqueue_unmap_image(const image_object &image, + void *mapped_ptr, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(image.get_context() == this->get_context()); + + return enqueue_unmap_mem_object(image.get(), mapped_ptr, events); + } + + /// Enqueues a command to copy data from \p src_image to \p dst_image. + /// + /// \see_opencl_ref{clEnqueueCopyImage} + event enqueue_copy_image(const image_object& src_image, + image_object& dst_image, + const size_t *src_origin, + const size_t *dst_origin, + const size_t *region, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(m_queue != 0); + + event event_; + + cl_int ret = clEnqueueCopyImage( + m_queue, + src_image.get(), + dst_image.get(), + src_origin, + dst_origin, + region, + events.size(), + events.get_event_ptr(), + &event_.get() + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; + } + + /// \overload + template<size_t N> + event enqueue_copy_image(const image_object& src_image, + image_object& dst_image, + const extents<N> src_origin, + const extents<N> dst_origin, + const extents<N> region, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(src_image.get_context() == this->get_context()); + BOOST_ASSERT(dst_image.get_context() == this->get_context()); + BOOST_ASSERT_MSG(src_image.format() == dst_image.format(), + "Source and destination image formats must match."); + + size_t src_origin3[3] = { 0, 0, 0 }; + size_t dst_origin3[3] = { 0, 0, 0 }; + size_t region3[3] = { 1, 1, 1 }; + + std::copy(src_origin.data(), src_origin.data() + N, src_origin3); + std::copy(dst_origin.data(), dst_origin.data() + N, dst_origin3); + std::copy(region.data(), region.data() + N, region3); + + return enqueue_copy_image( + src_image, dst_image, src_origin3, dst_origin3, region3, events + ); + } + + /// Enqueues a command to copy data from \p src_image to \p dst_buffer. + /// + /// \see_opencl_ref{clEnqueueCopyImageToBuffer} + event enqueue_copy_image_to_buffer(const image_object& src_image, + memory_object& dst_buffer, + const size_t *src_origin, + const size_t *region, + size_t dst_offset, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(m_queue != 0); + + event event_; + + cl_int ret = clEnqueueCopyImageToBuffer( + m_queue, + src_image.get(), + dst_buffer.get(), + src_origin, + region, + dst_offset, + events.size(), + events.get_event_ptr(), + &event_.get() + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; + } + + /// Enqueues a command to copy data from \p src_buffer to \p dst_image. + /// + /// \see_opencl_ref{clEnqueueCopyBufferToImage} + event enqueue_copy_buffer_to_image(const memory_object& src_buffer, + image_object& dst_image, + size_t src_offset, + const size_t *dst_origin, + const size_t *region, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(m_queue != 0); + + event event_; + + cl_int ret = clEnqueueCopyBufferToImage( + m_queue, + src_buffer.get(), + dst_image.get(), + src_offset, + dst_origin, + region, + events.size(), + events.get_event_ptr(), + &event_.get() + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; + } + + #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + /// Enqueues a command to fill \p image with \p fill_color. + /// + /// \see_opencl_ref{clEnqueueFillImage} + /// + /// \opencl_version_warning{1,2} + event enqueue_fill_image(image_object& image, + const void *fill_color, + const size_t *origin, + const size_t *region, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(m_queue != 0); + + event event_; + + cl_int ret = clEnqueueFillImage( + m_queue, + image.get(), + fill_color, + origin, + region, + events.size(), + events.get_event_ptr(), + &event_.get() + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; + } + + /// \overload + template<size_t N> + event enqueue_fill_image(image_object& image, + const void *fill_color, + const extents<N> origin, + const extents<N> region, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(image.get_context() == this->get_context()); + + size_t origin3[3] = { 0, 0, 0 }; + size_t region3[3] = { 1, 1, 1 }; + + std::copy(origin.data(), origin.data() + N, origin3); + std::copy(region.data(), region.data() + N, region3); + + return enqueue_fill_image( + image, fill_color, origin3, region3, events + ); + } + + /// Enqueues a command to migrate \p mem_objects. + /// + /// \see_opencl_ref{clEnqueueMigrateMemObjects} + /// + /// \opencl_version_warning{1,2} + event enqueue_migrate_memory_objects(uint_ num_mem_objects, + const cl_mem *mem_objects, + cl_mem_migration_flags flags, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(m_queue != 0); + + event event_; + + cl_int ret = clEnqueueMigrateMemObjects( + m_queue, + num_mem_objects, + mem_objects, + flags, + events.size(), + events.get_event_ptr(), + &event_.get() + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; + } + #endif // CL_VERSION_1_2 + + /// Enqueues a kernel for execution. + /// + /// \see_opencl_ref{clEnqueueNDRangeKernel} + event enqueue_nd_range_kernel(const kernel &kernel, + size_t work_dim, + const size_t *global_work_offset, + const size_t *global_work_size, + const size_t *local_work_size, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(m_queue != 0); + BOOST_ASSERT(kernel.get_context() == this->get_context()); + + event event_; + + cl_int ret = clEnqueueNDRangeKernel( + m_queue, + kernel, + static_cast<cl_uint>(work_dim), + global_work_offset, + global_work_size, + local_work_size, + events.size(), + events.get_event_ptr(), + &event_.get() + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; + } + + /// \overload + template<size_t N> + event enqueue_nd_range_kernel(const kernel &kernel, + const extents<N> &global_work_offset, + const extents<N> &global_work_size, + const extents<N> &local_work_size, + const wait_list &events = wait_list()) + { + return enqueue_nd_range_kernel( + kernel, + N, + global_work_offset.data(), + global_work_size.data(), + local_work_size.data(), + events + ); + } + + /// Convenience method which calls enqueue_nd_range_kernel() with a + /// one-dimensional range. + event enqueue_1d_range_kernel(const kernel &kernel, + size_t global_work_offset, + size_t global_work_size, + size_t local_work_size, + const wait_list &events = wait_list()) + { + return enqueue_nd_range_kernel( + kernel, + 1, + &global_work_offset, + &global_work_size, + local_work_size ? &local_work_size : 0, + events + ); + } + + /// Enqueues a kernel to execute using a single work-item. + /// + /// \see_opencl_ref{clEnqueueTask} + event enqueue_task(const kernel &kernel, const wait_list &events = wait_list()) + { + BOOST_ASSERT(m_queue != 0); + BOOST_ASSERT(kernel.get_context() == this->get_context()); + + event event_; + + // clEnqueueTask() was deprecated in OpenCL 2.0. In that case we + // just forward to the equivalent clEnqueueNDRangeKernel() call. + #ifdef CL_VERSION_2_0 + size_t one = 1; + cl_int ret = clEnqueueNDRangeKernel( + m_queue, kernel, 1, 0, &one, &one, + events.size(), events.get_event_ptr(), &event_.get() + ); + #else + cl_int ret = clEnqueueTask( + m_queue, kernel, events.size(), events.get_event_ptr(), &event_.get() + ); + #endif + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; + } + + /// Enqueues a function to execute on the host. + event enqueue_native_kernel(void (BOOST_COMPUTE_CL_CALLBACK *user_func)(void *), + void *args, + size_t cb_args, + uint_ num_mem_objects, + const cl_mem *mem_list, + const void **args_mem_loc, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(m_queue != 0); + + event event_; + cl_int ret = clEnqueueNativeKernel( + m_queue, + user_func, + args, + cb_args, + num_mem_objects, + mem_list, + args_mem_loc, + events.size(), + events.get_event_ptr(), + &event_.get() + ); + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; + } + + /// Convenience overload for enqueue_native_kernel() which enqueues a + /// native kernel on the host with a nullary function. + event enqueue_native_kernel(void (BOOST_COMPUTE_CL_CALLBACK *user_func)(void), + const wait_list &events = wait_list()) + { + return enqueue_native_kernel( + detail::nullary_native_kernel_trampoline, + reinterpret_cast<void *>(&user_func), + sizeof(user_func), + 0, + 0, + 0, + events + ); + } + + /// Flushes the command queue. + /// + /// \see_opencl_ref{clFlush} + void flush() + { + BOOST_ASSERT(m_queue != 0); + + clFlush(m_queue); + } + + /// Blocks until all outstanding commands in the queue have finished. + /// + /// \see_opencl_ref{clFinish} + void finish() + { + BOOST_ASSERT(m_queue != 0); + + clFinish(m_queue); + } + + /// Enqueues a barrier in the queue. + void enqueue_barrier() + { + BOOST_ASSERT(m_queue != 0); + cl_int ret = CL_SUCCESS; + + #ifdef CL_VERSION_1_2 + if(get_device().check_version(1, 2)){ + ret = clEnqueueBarrierWithWaitList(m_queue, 0, 0, 0); + } else + #endif // CL_VERSION_1_2 + { + // Suppress deprecated declarations warning + BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS(); + ret = clEnqueueBarrier(m_queue); + BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS(); + } + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + } + + #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + /// Enqueues a barrier in the queue after \p events. + /// + /// \opencl_version_warning{1,2} + event enqueue_barrier(const wait_list &events) + { + BOOST_ASSERT(m_queue != 0); + + event event_; + cl_int ret = CL_SUCCESS; + + ret = clEnqueueBarrierWithWaitList( + m_queue, events.size(), events.get_event_ptr(), &event_.get() + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; + } + #endif // CL_VERSION_1_2 + + /// Enqueues a marker in the queue and returns an event that can be + /// used to track its progress. + event enqueue_marker() + { + event event_; + cl_int ret = CL_SUCCESS; + + #ifdef CL_VERSION_1_2 + if(get_device().check_version(1, 2)){ + ret = clEnqueueMarkerWithWaitList(m_queue, 0, 0, &event_.get()); + } else + #endif + { + // Suppress deprecated declarations warning + BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS(); + ret = clEnqueueMarker(m_queue, &event_.get()); + BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS(); + } + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; + } + + #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + /// Enqueues a marker after \p events in the queue and returns an + /// event that can be used to track its progress. + /// + /// \opencl_version_warning{1,2} + event enqueue_marker(const wait_list &events) + { + event event_; + + cl_int ret = clEnqueueMarkerWithWaitList( + m_queue, events.size(), events.get_event_ptr(), &event_.get() + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; + } + #endif // CL_VERSION_1_2 + + #if defined(CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + /// Enqueues a command to copy \p size bytes of data from \p src_ptr to + /// \p dst_ptr. + /// + /// \opencl_version_warning{2,0} + /// + /// \see_opencl2_ref{clEnqueueSVMMemcpy} + event enqueue_svm_memcpy(void *dst_ptr, + const void *src_ptr, + size_t size, + const wait_list &events = wait_list()) + { + event event_; + + cl_int ret = clEnqueueSVMMemcpy( + m_queue, + CL_TRUE, + dst_ptr, + src_ptr, + size, + events.size(), + events.get_event_ptr(), + &event_.get() + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; + } + + /// Enqueues a command to copy \p size bytes of data from \p src_ptr to + /// \p dst_ptr. The operation is performed asynchronously. + /// + /// \opencl_version_warning{2,0} + /// + /// \see_opencl2_ref{clEnqueueSVMMemcpy} + event enqueue_svm_memcpy_async(void *dst_ptr, + const void *src_ptr, + size_t size, + const wait_list &events = wait_list()) + { + event event_; + + cl_int ret = clEnqueueSVMMemcpy( + m_queue, + CL_FALSE, + dst_ptr, + src_ptr, + size, + events.size(), + events.get_event_ptr(), + &event_.get() + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; + } + + /// Enqueues a command to fill \p size bytes of data at \p svm_ptr with + /// \p pattern. + /// + /// \opencl_version_warning{2,0} + /// + /// \see_opencl2_ref{clEnqueueSVMMemFill} + event enqueue_svm_fill(void *svm_ptr, + const void *pattern, + size_t pattern_size, + size_t size, + const wait_list &events = wait_list()) + + { + event event_; + + cl_int ret = clEnqueueSVMMemFill( + m_queue, + svm_ptr, + pattern, + pattern_size, + size, + events.size(), + events.get_event_ptr(), + &event_.get() + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; + } + + /// Enqueues a command to free \p svm_ptr. + /// + /// \opencl_version_warning{2,0} + /// + /// \see_opencl2_ref{clEnqueueSVMFree} + /// + /// \see svm_free() + event enqueue_svm_free(void *svm_ptr, + const wait_list &events = wait_list()) + { + event event_; + + cl_int ret = clEnqueueSVMFree( + m_queue, + 1, + &svm_ptr, + 0, + 0, + events.size(), + events.get_event_ptr(), + &event_.get() + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; + } + + /// Enqueues a command to map \p svm_ptr to the host memory space. + /// + /// \opencl_version_warning{2,0} + /// + /// \see_opencl2_ref{clEnqueueSVMMap} + event enqueue_svm_map(void *svm_ptr, + size_t size, + cl_map_flags flags, + const wait_list &events = wait_list()) + { + event event_; + + cl_int ret = clEnqueueSVMMap( + m_queue, + CL_TRUE, + flags, + svm_ptr, + size, + events.size(), + events.get_event_ptr(), + &event_.get() + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; + } + + /// Enqueues a command to unmap \p svm_ptr from the host memory space. + /// + /// \opencl_version_warning{2,0} + /// + /// \see_opencl2_ref{clEnqueueSVMUnmap} + event enqueue_svm_unmap(void *svm_ptr, + const wait_list &events = wait_list()) + { + event event_; + + cl_int ret = clEnqueueSVMUnmap( + m_queue, + svm_ptr, + events.size(), + events.get_event_ptr(), + &event_.get() + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; + } + #endif // CL_VERSION_2_0 + + /// Returns \c true if the command queue is the same at \p other. + bool operator==(const command_queue &other) const + { + return m_queue == other.m_queue; + } + + /// Returns \c true if the command queue is different from \p other. + bool operator!=(const command_queue &other) const + { + return m_queue != other.m_queue; + } + + /// \internal_ + operator cl_command_queue() const + { + return m_queue; + } + + /// \internal_ + bool check_device_version(int major, int minor) const + { + return get_device().check_version(major, minor); + } + +private: + cl_command_queue m_queue; +}; + +inline buffer buffer::clone(command_queue &queue) const +{ + buffer copy(get_context(), size(), get_memory_flags()); + queue.enqueue_copy_buffer(*this, copy, 0, 0, size()); + return copy; +} + +inline image1d image1d::clone(command_queue &queue) const +{ + image1d copy( + get_context(), width(), format(), get_memory_flags() + ); + + queue.enqueue_copy_image(*this, copy, origin(), copy.origin(), size()); + + return copy; +} + +inline image2d image2d::clone(command_queue &queue) const +{ + image2d copy( + get_context(), width(), height(), format(), get_memory_flags() + ); + + queue.enqueue_copy_image(*this, copy, origin(), copy.origin(), size()); + + return copy; +} + +inline image3d image3d::clone(command_queue &queue) const +{ + image3d copy( + get_context(), width(), height(), depth(), format(), get_memory_flags() + ); + + queue.enqueue_copy_image(*this, copy, origin(), copy.origin(), size()); + + return copy; +} + +/// \internal_ define get_info() specializations for command_queue +BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(command_queue, + ((cl_context, CL_QUEUE_CONTEXT)) + ((cl_device_id, CL_QUEUE_DEVICE)) + ((uint_, CL_QUEUE_REFERENCE_COUNT)) + ((cl_command_queue_properties, CL_QUEUE_PROPERTIES)) +) + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_COMMAND_QUEUE_HPP diff --git a/boost/compute/config.hpp b/boost/compute/config.hpp new file mode 100644 index 0000000000..77d0d7b9df --- /dev/null +++ b/boost/compute/config.hpp @@ -0,0 +1,70 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_CONFIG_HPP +#define BOOST_COMPUTE_CONFIG_HPP + +#include <boost/config.hpp> +#include <boost/version.hpp> +#include <boost/compute/cl.hpp> + +// check for minimum required boost version +#if BOOST_VERSION < 104800 +#error Boost.Compute requires Boost version 1.48 or later +#endif + +// the BOOST_COMPUTE_NO_VARIADIC_TEMPLATES macro is defined +// if the compiler does not *fully* support variadic templates +#if defined(BOOST_NO_CXX11_VARIADIC_TEMPLATES) || \ + defined(BOOST_NO_VARIADIC_TEMPLATES) || \ + (defined(__GNUC__) && !defined(__clang__) && \ + __GNUC__ == 4 && __GNUC_MINOR__ <= 6) + #define BOOST_COMPUTE_NO_VARIADIC_TEMPLATES +#endif // BOOST_NO_CXX11_VARIADIC_TEMPLATES + +// the BOOST_COMPUTE_NO_STD_TUPLE macro is defined if the +// compiler/stdlib does not support std::tuple +#if defined(BOOST_NO_CXX11_HDR_TUPLE) || \ + defined(BOOST_NO_0X_HDR_TUPLE) || \ + defined(BOOST_COMPUTE_NO_VARIADIC_TEMPLATES) + #define BOOST_COMPUTE_NO_STD_TUPLE +#endif // BOOST_NO_CXX11_HDR_TUPLE + +// defines BOOST_COMPUTE_CL_CALLBACK to the value of CL_CALLBACK +// if it is defined (it was added in OpenCL 1.1). this is used to +// annotate certain callback functions registered with OpenCL +#ifdef CL_CALLBACK +# define BOOST_COMPUTE_CL_CALLBACK CL_CALLBACK +#else +# define BOOST_COMPUTE_CL_CALLBACK +#endif + +// Maximum number of iterators acceptable for make_zip_iterator +#ifndef BOOST_COMPUTE_MAX_ARITY + // should be no more than max boost::tuple size (10 by default) +# define BOOST_COMPUTE_MAX_ARITY 10 +#endif + +#if !defined(BOOST_COMPUTE_DOXYGEN_INVOKED) && \ + (defined(BOOST_NO_CXX11_RVALUE_REFERENCES) || defined(BOOST_NO_RVALUE_REFERENCES)) +# define BOOST_COMPUTE_NO_RVALUE_REFERENCES +#endif // BOOST_NO_CXX11_RVALUE_REFERENCES + +#if defined(BOOST_NO_CXX11_HDR_INITIALIZER_LIST) || \ + defined(BOOST_NO_0X_HDR_INITIALIZER_LIST) +# define BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST +#endif // BOOST_NO_CXX11_HDR_INITIALIZER_LIST + +#if defined(BOOST_NO_CXX11_HDR_CHRONO) || \ + defined(BOOST_NO_0X_HDR_CHRONO) +# define BOOST_COMPUTE_NO_HDR_CHRONO +#endif // BOOST_NO_CXX11_HDR_CHRONO + +#endif // BOOST_COMPUTE_CONFIG_HPP diff --git a/boost/compute/container.hpp b/boost/compute/container.hpp new file mode 100644 index 0000000000..fc14f5fde2 --- /dev/null +++ b/boost/compute/container.hpp @@ -0,0 +1,27 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_CONTAINER_HPP +#define BOOST_COMPUTE_CONTAINER_HPP + +/// \file +/// +/// Meta-header to include all Boost.Compute container headers. + +#include <boost/compute/container/array.hpp> +#include <boost/compute/container/basic_string.hpp> +#include <boost/compute/container/dynamic_bitset.hpp> +#include <boost/compute/container/flat_map.hpp> +#include <boost/compute/container/flat_set.hpp> +#include <boost/compute/container/mapped_view.hpp> +#include <boost/compute/container/string.hpp> +#include <boost/compute/container/vector.hpp> + +#endif // BOOST_COMPUTE_CONTAINER_HPP diff --git a/boost/compute/container/array.hpp b/boost/compute/container/array.hpp new file mode 100644 index 0000000000..919be6eeac --- /dev/null +++ b/boost/compute/container/array.hpp @@ -0,0 +1,281 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_CONTAINER_ARRAY_HPP +#define BOOST_COMPUTE_CONTAINER_ARRAY_HPP + +#include <cstddef> +#include <iterator> +#include <exception> + +#include <boost/array.hpp> +#include <boost/throw_exception.hpp> + +#include <boost/compute/buffer.hpp> +#include <boost/compute/system.hpp> +#include <boost/compute/algorithm/fill.hpp> +#include <boost/compute/algorithm/swap_ranges.hpp> +#include <boost/compute/iterator/buffer_iterator.hpp> +#include <boost/compute/type_traits/detail/capture_traits.hpp> +#include <boost/compute/detail/buffer_value.hpp> + +namespace boost { +namespace compute { + +/// \class array +/// \brief A fixed-size container. +/// +/// The array container is very similar to the \ref vector container except +/// its size is fixed at compile-time rather than being dynamically resizable +/// at run-time. +/// +/// For example, to create a fixed-size array with eight values on the device: +/// \code +/// boost::compute::array<int, 8> values(context); +/// \endcode +/// +/// The Boost.Compute \c array class provides a STL-like API and is modeled +/// after the \c std::array class from the C++ standard library. +/// +/// \see \ref vector "vector<T>" +template<class T, std::size_t N> +class array +{ +public: + typedef T value_type; + typedef std::size_t size_type; + typedef ptrdiff_t difference_type; + typedef detail::buffer_value<T> reference; + typedef const detail::buffer_value<T> const_reference; + typedef T* pointer; + typedef const T* const_pointer; + typedef buffer_iterator<T> iterator; + typedef buffer_iterator<T> const_iterator; + typedef std::reverse_iterator<iterator> reverse_iterator; + typedef std::reverse_iterator<const_iterator> const_reverse_iterator; + + enum { + static_size = N + }; + + explicit array(const context &context = system::default_context()) + : m_buffer(context, sizeof(T) * N) + { + } + + array(const array<T, N> &other) + : m_buffer(other.m_buffer.get_context(), sizeof(T) * N) + { + boost::compute::copy(other.begin(), other.end(), begin()); + } + + array(const boost::array<T, N> &array, + const context &context = system::default_context()) + : m_buffer(context, sizeof(T) * N) + { + boost::compute::copy(array.begin(), array.end(), begin()); + } + + array<T, N>& operator=(const array<T, N> &other) + { + if(this != &other){ + boost::compute::copy(other.begin(), other.end(), begin()); + } + + return *this; + } + + array<T, N>& operator=(const boost::array<T, N> &array) + { + boost::compute::copy(array.begin(), array.end(), begin()); + + return *this; + } + + ~array() + { + } + + iterator begin() + { + return buffer_iterator<T>(m_buffer, 0); + } + + const_iterator begin() const + { + return buffer_iterator<T>(m_buffer, 0); + } + + const_iterator cbegin() const + { + return begin(); + } + + iterator end() + { + return buffer_iterator<T>(m_buffer, N); + } + + const_iterator end() const + { + return buffer_iterator<T>(m_buffer, N); + } + + const_iterator cend() const + { + return end(); + } + + reverse_iterator rbegin() + { + return reverse_iterator(end() - 1); + } + + const_reverse_iterator rbegin() const + { + return reverse_iterator(end() - 1); + } + + const_reverse_iterator crbegin() const + { + return rbegin(); + } + + reverse_iterator rend() + { + return reverse_iterator(begin() - 1); + } + + const_reverse_iterator rend() const + { + return reverse_iterator(begin() - 1); + } + + const_reverse_iterator crend() const + { + return rend(); + } + + size_type size() const + { + return N; + } + + bool empty() const + { + return N == 0; + } + + size_type max_size() const + { + return N; + } + + reference operator[](size_type index) + { + return *(begin() + static_cast<difference_type>(index)); + } + + const_reference operator[](size_type index) const + { + return *(begin() + static_cast<difference_type>(index)); + } + + reference at(size_type index) + { + if(index >= N){ + BOOST_THROW_EXCEPTION(std::out_of_range("index out of range")); + } + + return operator[](index); + } + + const_reference at(size_type index) const + { + if(index >= N){ + BOOST_THROW_EXCEPTION(std::out_of_range("index out of range")); + } + + return operator[](index); + } + + reference front() + { + return *begin(); + } + + const_reference front() const + { + return *begin(); + } + + reference back() + { + return *(end() - static_cast<difference_type>(1)); + } + + const_reference back() const + { + return *(end() - static_cast<difference_type>(1)); + } + + void fill(const value_type &value) + { + ::boost::compute::fill(begin(), end(), value); + } + + void swap(array<T, N> &other) + { + ::boost::compute::swap_ranges(begin(), end(), other.begin()); + } + + const buffer& get_buffer() const + { + return m_buffer; + } + +private: + buffer m_buffer; +}; + +namespace detail { + +// set_kernel_arg specialization for array<T, N> +template<class T, std::size_t N> +struct set_kernel_arg<array<T, N> > +{ + void operator()(kernel &kernel_, size_t index, const array<T, N> &array) + { + kernel_.set_arg(index, array.get_buffer()); + } +}; + +// for capturing array<T, N> with BOOST_COMPUTE_CLOSURE() +template<class T, size_t N> +struct capture_traits<array<T, N> > +{ + static std::string type_name() + { + return std::string("__global ") + ::boost::compute::type_name<T>() + "*"; + } +}; + +// meta_kernel streaming operator for array<T, N> +template<class T, size_t N> +meta_kernel& operator<<(meta_kernel &k, const array<T, N> &array) +{ + return k << k.get_buffer_identifier<T>(array.get_buffer()); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_CONTAINER_ARRAY_HPP diff --git a/boost/compute/container/basic_string.hpp b/boost/compute/container/basic_string.hpp new file mode 100644 index 0000000000..c5a2c46aa5 --- /dev/null +++ b/boost/compute/container/basic_string.hpp @@ -0,0 +1,331 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_CONTAINER_BASIC_STRING_HPP +#define BOOST_COMPUTE_CONTAINER_BASIC_STRING_HPP + +#include <string> +#include <cstring> + +#include <boost/compute/cl.hpp> +#include <boost/compute/algorithm/find.hpp> +#include <boost/compute/algorithm/search.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <iosfwd> + +namespace boost { +namespace compute { + +/// \class basic_string +/// \brief A template for a dynamically-sized character sequence. +/// +/// The \c basic_string class provides a generic template for a dynamically- +/// sized character sequence. This is most commonly used through the \c string +/// typedef (for \c basic_string<char>). +/// +/// For example, to create a string on the device with its contents copied +/// from a C-string on the host: +/// \code +/// boost::compute::string str("hello, world!"); +/// \endcode +/// +/// \see \ref vector "vector<T>" +template<class CharT, class Traits = std::char_traits<CharT> > +class basic_string +{ +public: + typedef Traits traits_type; + typedef typename Traits::char_type value_type; + typedef size_t size_type; + static const size_type npos = size_type(-1); + typedef typename ::boost::compute::vector<CharT>::reference reference; + typedef typename ::boost::compute::vector<CharT>::const_reference const_reference; + typedef typename ::boost::compute::vector<CharT>::iterator iterator; + typedef typename ::boost::compute::vector<CharT>::const_iterator const_iterator; + typedef typename ::boost::compute::vector<CharT>::reverse_iterator reverse_iterator; + typedef typename ::boost::compute::vector<CharT>::const_reverse_iterator const_reverse_iterator; + + basic_string() + { + } + + basic_string(size_type count, CharT ch) + : m_data(count) + { + std::fill(m_data.begin(), m_data.end(), ch); + } + + basic_string(const basic_string &other, + size_type pos, + size_type count = npos) + : m_data(other.begin() + pos, + other.begin() + (std::min)(other.size(), count)) + { + } + + basic_string(const char *s, size_type count) + : m_data(s, s + count) + { + } + + basic_string(const char *s) + : m_data(s, s + std::strlen(s)) + { + } + + template<class InputIterator> + basic_string(InputIterator first, InputIterator last) + : m_data(first, last) + { + } + + basic_string(const basic_string<CharT, Traits> &other) + : m_data(other.m_data) + { + } + + basic_string<CharT, Traits>& operator=(const basic_string<CharT, Traits> &other) + { + if(this != &other){ + m_data = other.m_data; + } + + return *this; + } + + ~basic_string() + { + } + + reference at(size_type pos) + { + return m_data.at(pos); + } + + const_reference at(size_type pos) const + { + return m_data.at(pos); + } + + reference operator[](size_type pos) + { + return m_data[pos]; + } + + const_reference operator[](size_type pos) const + { + return m_data[pos]; + } + + reference front() + { + return m_data.front(); + } + + const_reference front() const + { + return m_data.front(); + } + + reference back() + { + return m_data.back(); + } + + const_reference back() const + { + return m_data.back(); + } + + iterator begin() + { + return m_data.begin(); + } + + const_iterator begin() const + { + return m_data.begin(); + } + + const_iterator cbegin() const + { + return m_data.cbegin(); + } + + iterator end() + { + return m_data.end(); + } + + const_iterator end() const + { + return m_data.end(); + } + + const_iterator cend() const + { + return m_data.cend(); + } + + reverse_iterator rbegin() + { + return m_data.rbegin(); + } + + const_reverse_iterator rbegin() const + { + return m_data.rbegin(); + } + + const_reverse_iterator crbegin() const + { + return m_data.crbegin(); + } + + reverse_iterator rend() + { + return m_data.rend(); + } + + const_reverse_iterator rend() const + { + return m_data.rend(); + } + + const_reverse_iterator crend() const + { + return m_data.crend(); + } + + bool empty() const + { + return m_data.empty(); + } + + size_type size() const + { + return m_data.size(); + } + + size_type length() const + { + return m_data.size(); + } + + size_type max_size() const + { + return m_data.max_size(); + } + + void reserve(size_type size) + { + m_data.reserve(size); + } + + size_type capacity() const + { + return m_data.capacity(); + } + + void shrink_to_fit() + { + m_data.shrink_to_fit(); + } + + void clear() + { + m_data.clear(); + } + + void swap(basic_string<CharT, Traits> &other) + { + if(this != &other) + { + ::boost::compute::vector<CharT> temp_data(other.m_data); + other.m_data = m_data; + m_data = temp_data; + } + } + + basic_string<CharT, Traits> substr(size_type pos = 0, + size_type count = npos) const + { + return basic_string<CharT, Traits>(*this, pos, count); + } + + /// Finds the first character \p ch + size_type find(CharT ch, size_type pos = 0) const + { + const_iterator iter = ::boost::compute::find(begin() + pos, end(), ch); + if(iter == end()){ + return npos; + } + else { + return static_cast<size_type>(std::distance(begin(), iter)); + } + } + + /// Finds the first substring equal to \p str + size_type find(basic_string& str, size_type pos = 0) const + { + const_iterator iter = ::boost::compute::search(begin() + pos, end(), + str.begin(), str.end()); + if(iter == end()){ + return npos; + } + else { + return static_cast<size_type>(std::distance(begin(), iter)); + } + } + + /// Finds the first substring equal to the character string + /// pointed to by \p s. + /// The length of the string is determined by the first null character. + /// + /// For example, the following code + /// \snippet test/test_string.cpp string_find + /// + /// will return 5 as position. + size_type find(const char* s, size_type pos = 0) const + { + basic_string str(s); + const_iterator iter = ::boost::compute::search(begin() + pos, end(), + str.begin(), str.end()); + if(iter == end()){ + return npos; + } + else { + return static_cast<size_type>(std::distance(begin(), iter)); + } + } + +private: + ::boost::compute::vector<CharT> m_data; +}; + +template<class CharT, class Traits> +std::ostream& +operator<<(std::ostream& stream, + boost::compute::basic_string<CharT, Traits>const& outStr) +{ + command_queue queue = ::boost::compute::system::default_queue(); + boost::compute::copy(outStr.begin(), + outStr.end(), + std::ostream_iterator<CharT>(stream), + queue); + return stream; +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_CONTAINER_BASIC_STRING_HPP diff --git a/boost/compute/container/detail/scalar.hpp b/boost/compute/container/detail/scalar.hpp new file mode 100644 index 0000000000..7ecd86e540 --- /dev/null +++ b/boost/compute/container/detail/scalar.hpp @@ -0,0 +1,61 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_CONTAINER_DETAIL_SCALAR_HPP +#define BOOST_COMPUTE_CONTAINER_DETAIL_SCALAR_HPP + +#include <boost/compute/buffer.hpp> +#include <boost/compute/detail/read_write_single_value.hpp> + +namespace boost { +namespace compute { +namespace detail { + +// scalar<T> provides a trivial "container" that stores a +// single value in a memory buffer on a compute device +template<class T> +class scalar +{ +public: + typedef T value_type; + + scalar(const context &context) + : m_buffer(context, sizeof(T)) + { + } + + ~scalar() + { + } + + T read(command_queue &queue) const + { + return read_single_value<T>(m_buffer, 0, queue); + } + + void write(const T &value, command_queue &queue) + { + write_single_value<T>(value, m_buffer, 0, queue); + } + + const buffer& get_buffer() const + { + return m_buffer; + } + +private: + buffer m_buffer; +}; + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_CONTAINER_DETAIL_SCALAR_HPP diff --git a/boost/compute/container/dynamic_bitset.hpp b/boost/compute/container/dynamic_bitset.hpp new file mode 100644 index 0000000000..7f41901d64 --- /dev/null +++ b/boost/compute/container/dynamic_bitset.hpp @@ -0,0 +1,237 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_CONTAINER_DYNAMIC_BITSET_HPP +#define BOOST_COMPUTE_CONTAINER_DYNAMIC_BITSET_HPP + +#include <boost/compute/lambda.hpp> +#include <boost/compute/algorithm/any_of.hpp> +#include <boost/compute/algorithm/fill.hpp> +#include <boost/compute/algorithm/transform_reduce.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/functional/integer.hpp> +#include <boost/compute/types/fundamental.hpp> + +namespace boost { +namespace compute { + +/// \class dynamic_bitset +/// \brief The dynamic_bitset class contains a resizable bit array. +/// +/// For example, to create a dynamic-bitset with space for 1000 bits on the +/// device: +/// \code +/// boost::compute::dynamic_bitset<> bits(1000, queue); +/// \endcode +/// +/// The Boost.Compute \c dynamic_bitset class provides a STL-like API and is +/// modeled after the \c boost::dynamic_bitset class from Boost. +/// +/// \see \ref vector "vector<T>" +template<class Block = ulong_, class Alloc = buffer_allocator<Block> > +class dynamic_bitset +{ +public: + typedef Block block_type; + typedef Alloc allocator_type; + typedef vector<Block, Alloc> container_type; + typedef typename container_type::size_type size_type; + + BOOST_STATIC_CONSTANT(size_type, bits_per_block = sizeof(block_type) * CHAR_BIT); + BOOST_STATIC_CONSTANT(size_type, npos = static_cast<size_type>(-1)); + + /// Creates a new dynamic bitset with storage for \p size bits. Initializes + /// all bits to zero. + dynamic_bitset(size_type size, command_queue &queue) + : m_bits(size / sizeof(block_type), queue.get_context()), + m_size(size) + { + // initialize all bits to zero + reset(queue); + } + + /// Creates a new dynamic bitset as a copy of \p other. + dynamic_bitset(const dynamic_bitset &other) + : m_bits(other.m_bits), + m_size(other.m_size) + { + } + + /// Copies the data from \p other to \c *this. + dynamic_bitset& operator=(const dynamic_bitset &other) + { + if(this != &other){ + m_bits = other.m_bits; + m_size = other.m_size; + } + + return *this; + } + + /// Destroys the dynamic bitset. + ~dynamic_bitset() + { + } + + /// Returns the size of the dynamic bitset. + size_type size() const + { + return m_size; + } + + /// Returns the number of blocks to store the bits in the dynamic bitset. + size_type num_blocks() const + { + return m_bits.size(); + } + + /// Returns the maximum possible size for the dynamic bitset. + size_type max_size() const + { + return m_bits.max_size() * bits_per_block; + } + + /// Returns \c true if the dynamic bitset is empty (i.e. \c size() == \c 0). + bool empty() const + { + return size() == 0; + } + + /// Returns the number of set bits (i.e. '1') in the bitset. + size_type count(command_queue &queue) const + { + ulong_ count = 0; + transform_reduce( + m_bits.begin(), + m_bits.end(), + &count, + popcount<block_type>(), + plus<ulong_>(), + queue + ); + return static_cast<size_type>(count); + } + + /// Resizes the bitset to contain \p num_bits. If the new size is greater + /// than the current size the new bits are set to zero. + void resize(size_type num_bits, command_queue &queue) + { + // resize bits + const size_type current_block_count = m_bits.size(); + m_bits.resize(num_bits * bits_per_block, queue); + + // fill new block with zeros (if new blocks were added) + const size_type new_block_count = m_bits.size(); + if(new_block_count > current_block_count){ + fill_n( + m_bits.begin() + current_block_count, + new_block_count - current_block_count, + block_type(0), + queue + ); + } + + // store new size + m_size = num_bits; + } + + /// Sets the bit at position \p n to \c true. + void set(size_type n, command_queue &queue) + { + set(n, true, queue); + } + + /// Sets the bit at position \p n to \p value. + void set(size_type n, bool value, command_queue &queue) + { + const size_type bit = n % bits_per_block; + const size_type block = n / bits_per_block; + + // load current block + block_type block_value; + copy_n(m_bits.begin() + block, 1, &block_value, queue); + + // update block value + if(value){ + block_value |= (size_type(1) << bit); + } + else { + block_value &= ~(size_type(1) << bit); + } + + // store new block + copy_n(&block_value, 1, m_bits.begin() + block, queue); + } + + /// Returns \c true if the bit at position \p n is set (i.e. '1'). + bool test(size_type n, command_queue &queue) + { + const size_type bit = n % (sizeof(block_type) * CHAR_BIT); + const size_type block = n / (sizeof(block_type) * CHAR_BIT); + + block_type block_value; + copy_n(m_bits.begin() + block, 1, &block_value, queue); + + return block_value & (size_type(1) << bit); + } + + /// Flips the value of the bit at position \p n. + void flip(size_type n, command_queue &queue) + { + set(n, !test(n, queue), queue); + } + + /// Returns \c true if any bit in the bitset is set (i.e. '1'). + bool any(command_queue &queue) const + { + return any_of( + m_bits.begin(), m_bits.end(), lambda::_1 != block_type(0), queue + ); + } + + /// Returns \c true if all of the bits in the bitset are set to zero. + bool none(command_queue &queue) const + { + return !any(queue); + } + + /// Sets all of the bits in the bitset to zero. + void reset(command_queue &queue) + { + fill(m_bits.begin(), m_bits.end(), block_type(0), queue); + } + + /// Sets the bit at position \p n to zero. + void reset(size_type n, command_queue &queue) + { + set(n, false, queue); + } + + /// Empties the bitset (e.g. \c resize(0)). + void clear() + { + m_bits.clear(); + } + + /// Returns the allocator used to allocate storage for the bitset. + allocator_type get_allocator() const + { + return m_bits.get_allocator(); + } + +private: + container_type m_bits; + size_type m_size; +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_CONTAINER_DYNAMIC_BITSET_HPP diff --git a/boost/compute/container/flat_map.hpp b/boost/compute/container/flat_map.hpp new file mode 100644 index 0000000000..684c4da122 --- /dev/null +++ b/boost/compute/container/flat_map.hpp @@ -0,0 +1,406 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_CONTAINER_FLAT_MAP_HPP +#define BOOST_COMPUTE_CONTAINER_FLAT_MAP_HPP + +#include <cstddef> +#include <utility> +#include <exception> + +#include <boost/config.hpp> +#include <boost/throw_exception.hpp> + +#include <boost/compute/exception.hpp> +#include <boost/compute/algorithm/find.hpp> +#include <boost/compute/algorithm/lower_bound.hpp> +#include <boost/compute/algorithm/upper_bound.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/functional/get.hpp> +#include <boost/compute/iterator/transform_iterator.hpp> +#include <boost/compute/types/pair.hpp> +#include <boost/compute/detail/buffer_value.hpp> + +namespace boost { +namespace compute { + +template<class Key, class T> +class flat_map +{ +public: + typedef Key key_type; + typedef T mapped_type; + typedef typename ::boost::compute::vector<std::pair<Key, T> > vector_type; + typedef typename vector_type::value_type value_type; + typedef typename vector_type::size_type size_type; + typedef typename vector_type::difference_type difference_type; + typedef typename vector_type::reference reference; + typedef typename vector_type::const_reference const_reference; + typedef typename vector_type::pointer pointer; + typedef typename vector_type::const_pointer const_pointer; + typedef typename vector_type::iterator iterator; + typedef typename vector_type::const_iterator const_iterator; + typedef typename vector_type::reverse_iterator reverse_iterator; + typedef typename vector_type::const_reverse_iterator const_reverse_iterator; + + explicit flat_map(const context &context = system::default_context()) + : m_vector(context) + { + } + + flat_map(const flat_map<Key, T> &other) + : m_vector(other.m_vector) + { + } + + flat_map<Key, T>& operator=(const flat_map<Key, T> &other) + { + if(this != &other){ + m_vector = other.m_vector; + } + + return *this; + } + + ~flat_map() + { + } + + iterator begin() + { + return m_vector.begin(); + } + + const_iterator begin() const + { + return m_vector.begin(); + } + + const_iterator cbegin() const + { + return m_vector.cbegin(); + } + + iterator end() + { + return m_vector.end(); + } + + const_iterator end() const + { + return m_vector.end(); + } + + const_iterator cend() const + { + return m_vector.cend(); + } + + reverse_iterator rbegin() + { + return m_vector.rbegin(); + } + + const_reverse_iterator rbegin() const + { + return m_vector.rbegin(); + } + + const_reverse_iterator crbegin() const + { + return m_vector.crbegin(); + } + + reverse_iterator rend() + { + return m_vector.rend(); + } + + const_reverse_iterator rend() const + { + return m_vector.rend(); + } + + const_reverse_iterator crend() const + { + return m_vector.crend(); + } + + size_type size() const + { + return m_vector.size(); + } + + size_type max_size() const + { + return m_vector.max_size(); + } + + bool empty() const + { + return m_vector.empty(); + } + + size_type capacity() const + { + return m_vector.capacity(); + } + + void reserve(size_type size, command_queue &queue) + { + m_vector.reserve(size, queue); + } + + void reserve(size_type size) + { + command_queue queue = m_vector.default_queue(); + reserve(size, queue); + queue.finish(); + } + + void shrink_to_fit() + { + m_vector.shrink_to_fit(); + } + + void clear() + { + m_vector.clear(); + } + + std::pair<iterator, bool> + insert(const value_type &value, command_queue &queue) + { + iterator location = upper_bound(value.first, queue); + + if(location != begin()){ + value_type current_value; + ::boost::compute::copy_n(location - 1, 1, ¤t_value, queue); + if(value.first == current_value.first){ + return std::make_pair(location - 1, false); + } + } + + m_vector.insert(location, value); + return std::make_pair(location, true); + } + + std::pair<iterator, bool> insert(const value_type &value) + { + command_queue queue = m_vector.default_queue(); + std::pair<iterator, bool> result = insert(value, queue); + queue.finish(); + return result; + } + + iterator erase(const const_iterator &position, command_queue &queue) + { + return erase(position, position + 1, queue); + } + + iterator erase(const const_iterator &position) + { + command_queue queue = m_vector.default_queue(); + iterator iter = erase(position, queue); + queue.finish(); + return iter; + } + + iterator erase(const const_iterator &first, + const const_iterator &last, + command_queue &queue) + { + return m_vector.erase(first, last, queue); + } + + iterator erase(const const_iterator &first, const const_iterator &last) + { + command_queue queue = m_vector.default_queue(); + iterator iter = erase(first, last, queue); + queue.finish(); + return iter; + } + + size_type erase(const key_type &value, command_queue &queue) + { + iterator position = find(value, queue); + + if(position == end()){ + return 0; + } + else { + erase(position, queue); + return 1; + } + } + + iterator find(const key_type &value, command_queue &queue) + { + ::boost::compute::get<0> get_key; + + return ::boost::compute::find( + ::boost::compute::make_transform_iterator(begin(), get_key), + ::boost::compute::make_transform_iterator(end(), get_key), + value, + queue + ).base(); + } + + iterator find(const key_type &value) + { + command_queue queue = m_vector.default_queue(); + iterator iter = find(value, queue); + queue.finish(); + return iter; + } + + const_iterator find(const key_type &value, command_queue &queue) const + { + ::boost::compute::get<0> get_key; + + return ::boost::compute::find( + ::boost::compute::make_transform_iterator(begin(), get_key), + ::boost::compute::make_transform_iterator(end(), get_key), + value, + queue + ).base(); + } + + const_iterator find(const key_type &value) const + { + command_queue queue = m_vector.default_queue(); + const_iterator iter = find(value, queue); + queue.finish(); + return iter; + } + + size_type count(const key_type &value, command_queue &queue) const + { + return find(value, queue) != end() ? 1 : 0; + } + + size_type count(const key_type &value) const + { + command_queue queue = m_vector.default_queue(); + size_type result = count(value, queue); + queue.finish(); + return result; + } + + iterator lower_bound(const key_type &value, command_queue &queue) + { + ::boost::compute::get<0> get_key; + + return ::boost::compute::lower_bound( + ::boost::compute::make_transform_iterator(begin(), get_key), + ::boost::compute::make_transform_iterator(end(), get_key), + value, + queue + ).base(); + } + + iterator lower_bound(const key_type &value) + { + command_queue queue = m_vector.default_queue(); + iterator iter = lower_bound(value, queue); + queue.finish(); + return iter; + } + + const_iterator lower_bound(const key_type &value, command_queue &queue) const + { + ::boost::compute::get<0> get_key; + + return ::boost::compute::lower_bound( + ::boost::compute::make_transform_iterator(begin(), get_key), + ::boost::compute::make_transform_iterator(end(), get_key), + value, + queue + ).base(); + } + + const_iterator lower_bound(const key_type &value) const + { + command_queue queue = m_vector.default_queue(); + const_iterator iter = lower_bound(value, queue); + queue.finish(); + return iter; + } + + iterator upper_bound(const key_type &value, command_queue &queue) + { + ::boost::compute::get<0> get_key; + + return ::boost::compute::upper_bound( + ::boost::compute::make_transform_iterator(begin(), get_key), + ::boost::compute::make_transform_iterator(end(), get_key), + value, + queue + ).base(); + } + + iterator upper_bound(const key_type &value) + { + command_queue queue = m_vector.default_queue(); + iterator iter = upper_bound(value, queue); + queue.finish(); + return iter; + } + + const_iterator upper_bound(const key_type &value, command_queue &queue) const + { + ::boost::compute::get<0> get_key; + + return ::boost::compute::upper_bound( + ::boost::compute::make_transform_iterator(begin(), get_key), + ::boost::compute::make_transform_iterator(end(), get_key), + value, + queue + ).base(); + } + + const_iterator upper_bound(const key_type &value) const + { + command_queue queue = m_vector.default_queue(); + const_iterator iter = upper_bound(value, queue); + queue.finish(); + return iter; + } + + const mapped_type at(const key_type &key) const + { + const_iterator iter = find(key); + if(iter == end()){ + BOOST_THROW_EXCEPTION(std::out_of_range("key not found")); + } + + return value_type(*iter).second; + } + + detail::buffer_value<mapped_type> operator[](const key_type &key) + { + iterator iter = find(key); + if(iter == end()){ + iter = insert(std::make_pair(key, mapped_type())).first; + } + + size_t index = iter.get_index() * sizeof(value_type) + sizeof(key_type); + + return detail::buffer_value<mapped_type>(m_vector.get_buffer(), index); + } + +private: + ::boost::compute::vector<std::pair<Key, T> > m_vector; +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_CONTAINER_FLAT_MAP_HPP diff --git a/boost/compute/container/flat_set.hpp b/boost/compute/container/flat_set.hpp new file mode 100644 index 0000000000..8826f78846 --- /dev/null +++ b/boost/compute/container/flat_set.hpp @@ -0,0 +1,339 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_CONTAINER_FLAT_SET_HPP +#define BOOST_COMPUTE_CONTAINER_FLAT_SET_HPP + +#include <cstddef> +#include <utility> + +#include <boost/compute/algorithm/find.hpp> +#include <boost/compute/algorithm/lower_bound.hpp> +#include <boost/compute/algorithm/upper_bound.hpp> +#include <boost/compute/container/vector.hpp> + +namespace boost { +namespace compute { + +template<class T> +class flat_set +{ +public: + typedef T key_type; + typedef typename vector<T>::value_type value_type; + typedef typename vector<T>::size_type size_type; + typedef typename vector<T>::difference_type difference_type; + typedef typename vector<T>::reference reference; + typedef typename vector<T>::const_reference const_reference; + typedef typename vector<T>::pointer pointer; + typedef typename vector<T>::const_pointer const_pointer; + typedef typename vector<T>::iterator iterator; + typedef typename vector<T>::const_iterator const_iterator; + typedef typename vector<T>::reverse_iterator reverse_iterator; + typedef typename vector<T>::const_reverse_iterator const_reverse_iterator; + + explicit flat_set(const context &context = system::default_context()) + : m_vector(context) + { + } + + flat_set(const flat_set<T> &other) + : m_vector(other.m_vector) + { + } + + flat_set<T>& operator=(const flat_set<T> &other) + { + if(this != &other){ + m_vector = other.m_vector; + } + + return *this; + } + + ~flat_set() + { + } + + iterator begin() + { + return m_vector.begin(); + } + + const_iterator begin() const + { + return m_vector.begin(); + } + + const_iterator cbegin() const + { + return m_vector.cbegin(); + } + + iterator end() + { + return m_vector.end(); + } + + const_iterator end() const + { + return m_vector.end(); + } + + const_iterator cend() const + { + return m_vector.cend(); + } + + reverse_iterator rbegin() + { + return m_vector.rbegin(); + } + + const_reverse_iterator rbegin() const + { + return m_vector.rbegin(); + } + + const_reverse_iterator crbegin() const + { + return m_vector.crbegin(); + } + + reverse_iterator rend() + { + return m_vector.rend(); + } + + const_reverse_iterator rend() const + { + return m_vector.rend(); + } + + const_reverse_iterator crend() const + { + return m_vector.crend(); + } + + size_type size() const + { + return m_vector.size(); + } + + size_type max_size() const + { + return m_vector.max_size(); + } + + bool empty() const + { + return m_vector.empty(); + } + + size_type capacity() const + { + return m_vector.capacity(); + } + + void reserve(size_type size, command_queue &queue) + { + m_vector.reserve(size, queue); + } + + void reserve(size_type size) + { + command_queue queue = m_vector.default_queue(); + reserve(size, queue); + queue.finish(); + } + + void shrink_to_fit() + { + m_vector.shrink_to_fit(); + } + + void clear() + { + m_vector.clear(); + } + + std::pair<iterator, bool> + insert(const value_type &value, command_queue &queue) + { + iterator location = upper_bound(value, queue); + + if(location != begin()){ + value_type current_value; + ::boost::compute::copy_n(location - 1, 1, ¤t_value, queue); + if(value == current_value){ + return std::make_pair(location - 1, false); + } + } + + m_vector.insert(location, value, queue); + return std::make_pair(location, true); + } + + std::pair<iterator, bool> insert(const value_type &value) + { + command_queue queue = m_vector.default_queue(); + std::pair<iterator, bool> result = insert(value, queue); + queue.finish(); + return result; + } + + iterator erase(const const_iterator &position, command_queue &queue) + { + return erase(position, position + 1, queue); + } + + iterator erase(const const_iterator &position) + { + command_queue queue = m_vector.default_queue(); + iterator iter = erase(position, queue); + queue.finish(); + return iter; + } + + iterator erase(const const_iterator &first, + const const_iterator &last, + command_queue &queue) + { + return m_vector.erase(first, last, queue); + } + + iterator erase(const const_iterator &first, const const_iterator &last) + { + command_queue queue = m_vector.default_queue(); + iterator iter = erase(first, last, queue); + queue.finish(); + return iter; + } + + size_type erase(const key_type &value, command_queue &queue) + { + iterator position = find(value, queue); + + if(position == end()){ + return 0; + } + else { + erase(position, queue); + return 1; + } + } + + size_type erase(const key_type &value) + { + command_queue queue = m_vector.default_queue(); + size_type result = erase(value, queue); + queue.finish(); + return result; + } + + iterator find(const key_type &value, command_queue &queue) + { + return ::boost::compute::find(begin(), end(), value, queue); + } + + iterator find(const key_type &value) + { + command_queue queue = m_vector.default_queue(); + iterator iter = find(value, queue); + queue.finish(); + return iter; + } + + const_iterator find(const key_type &value, command_queue &queue) const + { + return ::boost::compute::find(begin(), end(), value, queue); + } + + const_iterator find(const key_type &value) const + { + command_queue queue = m_vector.default_queue(); + const_iterator iter = find(value, queue); + queue.finish(); + return iter; + } + + size_type count(const key_type &value, command_queue &queue) const + { + return find(value, queue) != end() ? 1 : 0; + } + + size_type count(const key_type &value) const + { + command_queue queue = m_vector.default_queue(); + size_type result = count(value, queue); + queue.finish(); + return result; + } + + iterator lower_bound(const key_type &value, command_queue &queue) + { + return ::boost::compute::lower_bound(begin(), end(), value, queue); + } + + iterator lower_bound(const key_type &value) + { + command_queue queue = m_vector.default_queue(); + iterator iter = lower_bound(value, queue); + queue.finish(); + return iter; + } + + const_iterator lower_bound(const key_type &value, command_queue &queue) const + { + return ::boost::compute::lower_bound(begin(), end(), value, queue); + } + + const_iterator lower_bound(const key_type &value) const + { + command_queue queue = m_vector.default_queue(); + const_iterator iter = lower_bound(value, queue); + queue.finish(); + return iter; + } + + iterator upper_bound(const key_type &value, command_queue &queue) + { + return ::boost::compute::upper_bound(begin(), end(), value, queue); + } + + iterator upper_bound(const key_type &value) + { + command_queue queue = m_vector.default_queue(); + iterator iter = upper_bound(value, queue); + queue.finish(); + return iter; + } + + const_iterator upper_bound(const key_type &value, command_queue &queue) const + { + return ::boost::compute::upper_bound(begin(), end(), value, queue); + } + + const_iterator upper_bound(const key_type &value) const + { + command_queue queue = m_vector.default_queue(); + const_iterator iter = upper_bound(value, queue); + queue.finish(); + return iter; + } + +private: + vector<T> m_vector; +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_CONTAINER_FLAT_SET_HPP diff --git a/boost/compute/container/mapped_view.hpp b/boost/compute/container/mapped_view.hpp new file mode 100644 index 0000000000..59b1e4e0a4 --- /dev/null +++ b/boost/compute/container/mapped_view.hpp @@ -0,0 +1,250 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_CONTAINER_MAPPED_VIEW_HPP +#define BOOST_COMPUTE_CONTAINER_MAPPED_VIEW_HPP + +#include <cstddef> +#include <exception> + +#include <boost/config.hpp> +#include <boost/throw_exception.hpp> + +#include <boost/compute/buffer.hpp> +#include <boost/compute/system.hpp> +#include <boost/compute/context.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/iterator/buffer_iterator.hpp> + +namespace boost { +namespace compute { + +/// \class mapped_view +/// \brief A mapped view of host memory. +/// +/// The mapped_view class simplifies mapping host-memory to a compute +/// device. This allows for host-allocated memory to be used with the +/// Boost.Compute algorithms. +/// +/// The following example shows how to map a simple C-array containing +/// data on the host to the device and run the reduce() algorithm to +/// calculate the sum: +/// +/// \snippet test/test_mapped_view.cpp reduce +/// +/// \see buffer +template<class T> +class mapped_view +{ +public: + typedef T value_type; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + typedef buffer_iterator<T> iterator; + typedef buffer_iterator<T> const_iterator; + + /// Creates a null mapped_view object. + mapped_view() + { + m_mapped_ptr = 0; + } + + /// Creates a mapped_view for \p host_ptr with \p n elements. After + /// constructing a mapped_view the data is available for use by a + /// compute device. Use the \p unmap() method to make the updated data + /// available to the host. + mapped_view(T *host_ptr, + size_type n, + const context &context = system::default_context()) + : m_buffer(_make_mapped_buffer(host_ptr, n, context)) + { + m_mapped_ptr = 0; + } + + /// Creates a read-only mapped_view for \p host_ptr with \p n elements. + /// After constructing a mapped_view the data is available for use by a + /// compute device. Use the \p unmap() method to make the updated data + /// available to the host. + mapped_view(const T *host_ptr, + size_type n, + const context &context = system::default_context()) + : m_buffer(_make_mapped_buffer(host_ptr, n, context)) + { + m_mapped_ptr = 0; + } + + /// Creates a copy of \p other. + mapped_view(const mapped_view<T> &other) + : m_buffer(other.m_buffer) + { + m_mapped_ptr = 0; + } + + /// Copies the mapped buffer from \p other. + mapped_view<T>& operator=(const mapped_view<T> &other) + { + if(this != &other){ + m_buffer = other.m_buffer; + m_mapped_ptr = 0; + } + + return *this; + } + + /// Destroys the mapped_view object. + ~mapped_view() + { + } + + /// Returns an iterator to the first element in the mapped_view. + iterator begin() + { + return ::boost::compute::make_buffer_iterator<T>(m_buffer, 0); + } + + /// Returns a const_iterator to the first element in the mapped_view. + const_iterator begin() const + { + return ::boost::compute::make_buffer_iterator<T>(m_buffer, 0); + } + + /// Returns a const_iterator to the first element in the mapped_view. + const_iterator cbegin() const + { + return begin(); + } + + /// Returns an iterator to one past the last element in the mapped_view. + iterator end() + { + return ::boost::compute::make_buffer_iterator<T>(m_buffer, size()); + } + + /// Returns a const_iterator to one past the last element in the mapped_view. + const_iterator end() const + { + return ::boost::compute::make_buffer_iterator<T>(m_buffer, size()); + } + + /// Returns a const_iterator to one past the last element in the mapped_view. + const_iterator cend() const + { + return end(); + } + + /// Returns the number of elements in the mapped_view. + size_type size() const + { + return m_buffer.size() / sizeof(T); + } + + /// Returns the host data pointer. + T* get_host_ptr() + { + return static_cast<T *>(m_buffer.get_info<void *>(CL_MEM_HOST_PTR)); + } + + /// Returns the host data pointer. + const T* get_host_ptr() const + { + return static_cast<T *>(m_buffer.get_info<void *>(CL_MEM_HOST_PTR)); + } + + /// Resizes the mapped_view to \p size elements. + void resize(size_type size) + { + T *old_ptr = get_host_ptr(); + + m_buffer = _make_mapped_buffer(old_ptr, size, m_buffer.get_context()); + } + + /// Returns \c true if the mapped_view is empty. + bool empty() const + { + return size() == 0; + } + + /// Returns the mapped buffer. + const buffer& get_buffer() const + { + return m_buffer; + } + + /// Maps the buffer into the host address space. + /// + /// \see_opencl_ref{clEnqueueMapBuffer} + void map(cl_map_flags flags, command_queue &queue) + { + BOOST_ASSERT(m_mapped_ptr == 0); + + m_mapped_ptr = queue.enqueue_map_buffer( + m_buffer, flags, 0, m_buffer.size() + ); + } + + /// Maps the buffer into the host address space for reading and writing. + /// + /// Equivalent to: + /// \code + /// map(CL_MAP_READ | CL_MAP_WRITE, queue); + /// \endcode + void map(command_queue &queue) + { + map(CL_MAP_READ | CL_MAP_WRITE, queue); + } + + /// Unmaps the buffer from the host address space. + /// + /// \see_opencl_ref{clEnqueueUnmapMemObject} + void unmap(command_queue &queue) + { + BOOST_ASSERT(m_mapped_ptr != 0); + + queue.enqueue_unmap_buffer(m_buffer, m_mapped_ptr); + + m_mapped_ptr = 0; + } + +private: + /// \internal_ + static buffer _make_mapped_buffer(T *host_ptr, + size_t n, + const context &context) + { + return buffer( + context, + n * sizeof(T), + buffer::read_write | buffer::use_host_ptr, + host_ptr + ); + } + + /// \internal_ + static buffer _make_mapped_buffer(const T *host_ptr, + size_t n, + const context &context) + { + return buffer( + context, + n * sizeof(T), + buffer::read_only | buffer::use_host_ptr, + const_cast<void *>(static_cast<const void *>(host_ptr)) + ); + } + +private: + buffer m_buffer; + void *m_mapped_ptr; +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_CONTAINER_MAPPED_VIEW_HPP diff --git a/boost/compute/container/stack.hpp b/boost/compute/container/stack.hpp new file mode 100644 index 0000000000..dc86df459a --- /dev/null +++ b/boost/compute/container/stack.hpp @@ -0,0 +1,81 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_CONTAINER_STACK_HPP +#define BOOST_COMPUTE_CONTAINER_STACK_HPP + +#include <boost/compute/container/vector.hpp> + +namespace boost { +namespace compute { + +template<class T> +class stack +{ +public: + typedef vector<T> container_type; + typedef typename container_type::size_type size_type; + typedef typename container_type::value_type value_type; + + stack() + { + } + + stack(const stack<T> &other) + : m_vector(other.m_vector) + { + } + + stack<T>& operator=(const stack<T> &other) + { + if(this != &other){ + m_vector = other.m_vector; + } + + return *this; + } + + ~stack() + { + } + + bool empty() const + { + return m_vector.empty(); + } + + size_type size() const + { + return m_vector.size(); + } + + value_type top() const + { + return m_vector.back(); + } + + void push(const T &value) + { + m_vector.push_back(value); + } + + void pop() + { + m_vector.pop_back(); + } + +private: + container_type m_vector; +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_CONTAINER_STACK_HPP diff --git a/boost/compute/container/string.hpp b/boost/compute/container/string.hpp new file mode 100644 index 0000000000..a721ab5746 --- /dev/null +++ b/boost/compute/container/string.hpp @@ -0,0 +1,25 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_CONTAINER_STRING_HPP +#define BOOST_COMPUTE_CONTAINER_STRING_HPP + +#include <boost/compute/types/fundamental.hpp> +#include <boost/compute/container/basic_string.hpp> + +namespace boost { +namespace compute { + +typedef basic_string<char_> string; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_CONTAINER_STRING_HPP diff --git a/boost/compute/container/valarray.hpp b/boost/compute/container/valarray.hpp new file mode 100644 index 0000000000..8ac8e01753 --- /dev/null +++ b/boost/compute/container/valarray.hpp @@ -0,0 +1,499 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_CONTAINER_VALARRAY_HPP +#define BOOST_COMPUTE_CONTAINER_VALARRAY_HPP + +#include <cstddef> +#include <valarray> + +#include <boost/static_assert.hpp> +#include <boost/type_traits.hpp> + +#include <boost/compute/buffer.hpp> +#include <boost/compute/algorithm/copy.hpp> +#include <boost/compute/algorithm/fill.hpp> +#include <boost/compute/algorithm/max_element.hpp> +#include <boost/compute/algorithm/min_element.hpp> +#include <boost/compute/algorithm/transform.hpp> +#include <boost/compute/algorithm/accumulate.hpp> +#include <boost/compute/detail/buffer_value.hpp> +#include <boost/compute/functional.hpp> +#include <boost/compute/functional/bind.hpp> +#include <boost/compute/iterator/buffer_iterator.hpp> +#include <boost/compute/type_traits.hpp> + +namespace boost { +namespace compute { + +template<class T> +class valarray +{ +public: + explicit valarray(const context &context = system::default_context()) + : m_buffer(context, 0) + { + } + + explicit valarray(size_t size, + const context &context = system::default_context()) + : m_buffer(context, size * sizeof(T)) + { + } + + valarray(const T &value, + size_t size, + const context &context = system::default_context()) + : m_buffer(context, size * sizeof(T)) + { + fill(begin(), end(), value); + } + + valarray(const T *values, + size_t size, + const context &context = system::default_context()) + : m_buffer(context, size * sizeof(T)) + { + copy(values, values + size, begin()); + } + + valarray(const valarray<T> &other) + : m_buffer(other.m_buffer.get_context(), other.size() * sizeof(T)) + { + } + + valarray(const std::valarray<T> &valarray, + const context &context = system::default_context()) + : m_buffer(context, valarray.size() * sizeof(T)) + { + copy(&valarray[0], &valarray[valarray.size()], begin()); + } + + valarray<T>& operator=(const valarray<T> &other) + { + if(this != &other){ + // change to other's OpenCL context + m_buffer = buffer(other.m_buffer.get_context(), other.size() * sizeof(T)); + copy(other.begin(), other.end(), begin()); + } + + return *this; + } + + valarray<T>& operator=(const std::valarray<T> &valarray) + { + m_buffer = buffer(m_buffer.get_context(), valarray.size() * sizeof(T)); + copy(&valarray[0], &valarray[valarray.size()], begin()); + + return *this; + } + + valarray<T>& operator*=(const T&); + + valarray<T>& operator/=(const T&); + + valarray<T>& operator%=(const T& val); + + valarray<T> operator+() const + { + // This operator can be used with any type. + valarray<T> result(size()); + copy(begin(), end(), result.begin()); + return result; + } + + valarray<T> operator-() const + { + BOOST_STATIC_ASSERT_MSG( + is_fundamental<T>::value, + "This operator can be used with all OpenCL built-in scalar" + " and vector types" + ); + valarray<T> result(size()); + BOOST_COMPUTE_FUNCTION(T, unary_minus, (T x), + { + return -x; + }); + transform(begin(), end(), result.begin(), unary_minus); + return result; + } + + valarray<T> operator~() const + { + BOOST_STATIC_ASSERT_MSG( + is_fundamental<T>::value && + !is_floating_point<typename scalar_type<T>::type>::value, + "This operator can be used with all OpenCL built-in scalar" + " and vector types except the built-in scalar and vector float types" + ); + valarray<T> result(size()); + BOOST_COMPUTE_FUNCTION(T, bitwise_not, (T x), + { + return ~x; + }); + transform(begin(), end(), result.begin(), bitwise_not); + return result; + } + + /// In OpenCL there cannot be memory buffer with bool type, for + /// this reason return type is valarray<char> instead of valarray<bool>. + /// 1 means true, 0 means false. + valarray<char> operator!() const + { + BOOST_STATIC_ASSERT_MSG( + is_fundamental<T>::value, + "This operator can be used with all OpenCL built-in scalar" + " and vector types" + ); + valarray<char> result(size()); + BOOST_COMPUTE_FUNCTION(char, logical_not, (T x), + { + return !x; + }); + transform(begin(), end(), &result[0], logical_not); + return result; + } + + valarray<T>& operator+=(const T&); + + valarray<T>& operator-=(const T&); + + valarray<T>& operator^=(const T&); + + valarray<T>& operator&=(const T&); + + valarray<T>& operator|=(const T&); + + valarray<T>& operator<<=(const T&); + + valarray<T>& operator>>=(const T&); + + valarray<T>& operator*=(const valarray<T>&); + + valarray<T>& operator/=(const valarray<T>&); + + valarray<T>& operator%=(const valarray<T>&); + + valarray<T>& operator+=(const valarray<T>&); + + valarray<T>& operator-=(const valarray<T>&); + + valarray<T>& operator^=(const valarray<T>&); + + valarray<T>& operator&=(const valarray<T>&); + + valarray<T>& operator|=(const valarray<T>&); + + valarray<T>& operator<<=(const valarray<T>&); + + valarray<T>& operator>>=(const valarray<T>&); + + ~valarray() + { + + } + + size_t size() const + { + return m_buffer.size() / sizeof(T); + } + + void resize(size_t size, T value = T()) + { + m_buffer = buffer(m_buffer.get_context(), size * sizeof(T)); + fill(begin(), end(), value); + } + + detail::buffer_value<T> operator[](size_t index) + { + return *(begin() + static_cast<ptrdiff_t>(index)); + } + + const detail::buffer_value<T> operator[](size_t index) const + { + return *(begin() + static_cast<ptrdiff_t>(index)); + } + + T (min)() const + { + return *(boost::compute::min_element(begin(), end())); + } + + T (max)() const + { + return *(boost::compute::max_element(begin(), end())); + } + + T sum() const + { + return boost::compute::accumulate(begin(), end(), T(0)); + } + + template<class UnaryFunction> + valarray<T> apply(UnaryFunction function) const + { + valarray<T> result(size()); + transform(begin(), end(), result.begin(), function); + return result; + } + + const buffer& get_buffer() const + { + return m_buffer; + } + + +private: + buffer_iterator<T> begin() const + { + return buffer_iterator<T>(m_buffer, 0); + } + + buffer_iterator<T> end() const + { + return buffer_iterator<T>(m_buffer, size()); + } + +private: + buffer m_buffer; +}; + +/// \internal_ +#define BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT(op, op_name, assert) \ + template<class T> \ + inline valarray<T>& \ + valarray<T>::operator op##=(const T& val) \ + { \ + assert \ + transform(begin(), end(), begin(), \ + ::boost::compute::bind(op_name<T>(), placeholders::_1, val)); \ + return *this; \ + } \ + \ + template<class T> \ + inline valarray<T>& \ + valarray<T>::operator op##=(const valarray<T> &rhs) \ + { \ + assert \ + transform(begin(), end(), rhs.begin(), begin(), op_name<T>()); \ + return *this; \ + } + +/// \internal_ +#define BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_ANY(op, op_name) \ + BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT(op, op_name, \ + BOOST_STATIC_ASSERT_MSG( \ + is_fundamental<T>::value, \ + "This operator can be used with all OpenCL built-in scalar" \ + " and vector types" \ + ); \ + ) + +/// \internal_ +/// For some operators class T can't be floating point type. +/// See OpenCL specification, operators chapter. +#define BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP(op, op_name) \ + BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT(op, op_name, \ + BOOST_STATIC_ASSERT_MSG( \ + is_fundamental<T>::value && \ + !is_floating_point<typename scalar_type<T>::type>::value, \ + "This operator can be used with all OpenCL built-in scalar" \ + " and vector types except the built-in scalar and vector float types" \ + ); \ + ) + +// defining operators +BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_ANY(+, plus) +BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_ANY(-, minus) +BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_ANY(*, multiplies) +BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_ANY(/, divides) +BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP(^, bit_xor) +BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP(&, bit_and) +BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP(|, bit_or) +BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP(<<, shift_left) +BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP(>>, shift_right) + +// The remainder (%) operates on +// integer scalar and integer vector data types only. +// See OpenCL specification. +BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT(%, modulus, + BOOST_STATIC_ASSERT_MSG( + is_integral<typename scalar_type<T>::type>::value, + "This operator can be used only with OpenCL built-in integer types" + ); +) + +#undef BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_ANY +#undef BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT_NO_FP + +#undef BOOST_COMPUTE_DEFINE_VALARRAY_COMPOUND_ASSIGNMENT + +/// \internal_ +/// Macro for defining binary operators for valarray +#define BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR(op, op_name, assert) \ + template<class T> \ + valarray<T> operator op (const valarray<T>& lhs, const valarray<T>& rhs) \ + { \ + assert \ + valarray<T> result(lhs.size()); \ + transform(buffer_iterator<T>(lhs.get_buffer(), 0), \ + buffer_iterator<T>(lhs.get_buffer(), lhs.size()), \ + buffer_iterator<T>(rhs.get_buffer(), 0), \ + buffer_iterator<T>(result.get_buffer(), 0), \ + op_name<T>()); \ + return result; \ + } \ + \ + template<class T> \ + valarray<T> operator op (const T& val, const valarray<T>& rhs) \ + { \ + assert \ + valarray<T> result(rhs.size()); \ + transform(buffer_iterator<T>(rhs.get_buffer(), 0), \ + buffer_iterator<T>(rhs.get_buffer(), rhs.size()), \ + buffer_iterator<T>(result.get_buffer(), 0), \ + ::boost::compute::bind(op_name<T>(), val, placeholders::_1)); \ + return result; \ + } \ + \ + template<class T> \ + valarray<T> operator op (const valarray<T>& lhs, const T& val) \ + { \ + assert \ + valarray<T> result(lhs.size()); \ + transform(buffer_iterator<T>(lhs.get_buffer(), 0), \ + buffer_iterator<T>(lhs.get_buffer(), lhs.size()), \ + buffer_iterator<T>(result.get_buffer(), 0), \ + ::boost::compute::bind(op_name<T>(), placeholders::_1, val)); \ + return result; \ + } + +/// \internal_ +#define BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_ANY(op, op_name) \ + BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR(op, op_name, \ + BOOST_STATIC_ASSERT_MSG( \ + is_fundamental<T>::value, \ + "This operator can be used with all OpenCL built-in scalar" \ + " and vector types" \ + ); \ + ) + +/// \internal_ +/// For some operators class T can't be floating point type. +/// See OpenCL specification, operators chapter. +#define BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_NO_FP(op, op_name) \ + BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR(op, op_name, \ + BOOST_STATIC_ASSERT_MSG( \ + is_fundamental<T>::value && \ + !is_floating_point<typename scalar_type<T>::type>::value, \ + "This operator can be used with all OpenCL built-in scalar" \ + " and vector types except the built-in scalar and vector float types" \ + ); \ + ) + +// defining binary operators for valarray +BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_ANY(+, plus) +BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_ANY(-, minus) +BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_ANY(*, multiplies) +BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_ANY(/, divides) +BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_NO_FP(^, bit_xor) +BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_NO_FP(&, bit_and) +BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_NO_FP(|, bit_or) +BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_NO_FP(<<, shift_left) +BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_NO_FP(>>, shift_right) + +#undef BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_ANY +#undef BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR_NO_FP + +#undef BOOST_COMPUTE_DEFINE_VALARRAY_BINARY_OPERATOR + +/// \internal_ +/// Macro for defining valarray comparison operators. +/// For return type valarray<char> is used instead of valarray<bool> because +/// in OpenCL there cannot be memory buffer with bool type. +/// +/// Note it's also used for defining binary logical operators (==, &&) +#define BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR(op, op_name) \ + template<class T> \ + valarray<char> operator op (const valarray<T>& lhs, const valarray<T>& rhs) \ + { \ + BOOST_STATIC_ASSERT_MSG( \ + is_fundamental<T>::value, \ + "This operator can be used with all OpenCL built-in scalar" \ + " and vector types" \ + ); \ + valarray<char> result(lhs.size()); \ + transform(buffer_iterator<T>(lhs.get_buffer(), 0), \ + buffer_iterator<T>(lhs.get_buffer(), lhs.size()), \ + buffer_iterator<T>(rhs.get_buffer(), 0), \ + buffer_iterator<char>(result.get_buffer(), 0), \ + op_name<T>()); \ + return result; \ + } \ + \ + template<class T> \ + valarray<char> operator op (const T& val, const valarray<T>& rhs) \ + { \ + BOOST_STATIC_ASSERT_MSG( \ + is_fundamental<T>::value, \ + "This operator can be used with all OpenCL built-in scalar" \ + " and vector types" \ + ); \ + valarray<char> result(rhs.size()); \ + transform(buffer_iterator<T>(rhs.get_buffer(), 0), \ + buffer_iterator<T>(rhs.get_buffer(), rhs.size()), \ + buffer_iterator<char>(result.get_buffer(), 0), \ + ::boost::compute::bind(op_name<T>(), val, placeholders::_1)); \ + return result; \ + } \ + \ + template<class T> \ + valarray<char> operator op (const valarray<T>& lhs, const T& val) \ + { \ + BOOST_STATIC_ASSERT_MSG( \ + is_fundamental<T>::value, \ + "This operator can be used with all OpenCL built-in scalar" \ + " and vector types" \ + ); \ + valarray<char> result(lhs.size()); \ + transform(buffer_iterator<T>(lhs.get_buffer(), 0), \ + buffer_iterator<T>(lhs.get_buffer(), lhs.size()), \ + buffer_iterator<char>(result.get_buffer(), 0), \ + ::boost::compute::bind(op_name<T>(), placeholders::_1, val)); \ + return result; \ + } + +BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR(==, equal_to) +BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR(!=, not_equal_to) +BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR(>, greater) +BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR(<, less) +BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR(>=, greater_equal) +BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR(<=, less_equal) + +/// \internal_ +/// Macro for defining binary logical operators for valarray. +/// +/// For return type valarray<char> is used instead of valarray<bool> because +/// in OpenCL there cannot be memory buffer with bool type. +/// 1 means true, 0 means false. +#define BOOST_COMPUTE_DEFINE_VALARRAY_LOGICAL_OPERATOR(op, op_name) \ + BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR(op, op_name) + +BOOST_COMPUTE_DEFINE_VALARRAY_LOGICAL_OPERATOR(&&, logical_and) +BOOST_COMPUTE_DEFINE_VALARRAY_LOGICAL_OPERATOR(||, logical_or) + +#undef BOOST_COMPUTE_DEFINE_VALARRAY_LOGICAL_OPERATOR + +#undef BOOST_COMPUTE_DEFINE_VALARRAY_COMPARISON_OPERATOR + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_CONTAINER_VALARRAY_HPP diff --git a/boost/compute/container/vector.hpp b/boost/compute/container/vector.hpp new file mode 100644 index 0000000000..47d649ad99 --- /dev/null +++ b/boost/compute/container/vector.hpp @@ -0,0 +1,761 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_CONTAINER_VECTOR_HPP +#define BOOST_COMPUTE_CONTAINER_VECTOR_HPP + +#include <vector> +#include <cstddef> +#include <iterator> +#include <exception> + +#include <boost/throw_exception.hpp> + +#include <boost/compute/config.hpp> + +#ifndef BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST +#include <initializer_list> +#endif + +#include <boost/compute/buffer.hpp> +#include <boost/compute/device.hpp> +#include <boost/compute/system.hpp> +#include <boost/compute/context.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/copy.hpp> +#include <boost/compute/algorithm/copy_n.hpp> +#include <boost/compute/algorithm/fill_n.hpp> +#include <boost/compute/allocator/buffer_allocator.hpp> +#include <boost/compute/iterator/buffer_iterator.hpp> +#include <boost/compute/type_traits/detail/capture_traits.hpp> +#include <boost/compute/detail/buffer_value.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> + +namespace boost { +namespace compute { + +/// \class vector +/// \brief A resizable array of values. +/// +/// The vector<T> class stores a dynamic array of values. Internally, the data +/// is stored in an OpenCL buffer object. +/// +/// The vector class is the prefered container for storing and accessing data +/// on a compute device. In most cases it should be used instead of directly +/// dealing with buffer objects. If the undelying buffer is needed, it can be +/// accessed with the get_buffer() method. +/// +/// The internal storage is allocated in a specific OpenCL context which is +/// passed as an argument to the constructor when the vector is created. +/// +/// For example, to create a vector on the device containing space for ten +/// \c int values: +/// \code +/// boost::compute::vector<int> vec(10, context); +/// \endcode +/// +/// Allocation and data transfer can also be performed in a single step: +/// \code +/// // values on the host +/// int data[] = { 1, 2, 3, 4 }; +/// +/// // create a vector of size four and copy the values from data +/// boost::compute::vector<int> vec(data, data + 4, queue); +/// \endcode +/// +/// The Boost.Compute \c vector class provides a STL-like API and is modeled +/// after the \c std::vector class from the C++ standard library. It can be +/// used with any of the STL-like algorithms provided by Boost.Compute +/// including \c copy(), \c transform(), and \c sort() (among many others). +/// +/// For example: +/// \code +/// // a vector on a compute device +/// boost::compute::vector<float> vec = ... +/// +/// // copy data to the vector from a host std:vector +/// boost::compute::copy(host_vec.begin(), host_vec.end(), vec.begin(), queue); +/// +/// // copy data from the vector to a host std::vector +/// boost::compute::copy(vec.begin(), vec.end(), host_vec.begin(), queue); +/// +/// // sort the values in the vector +/// boost::compute::sort(vec.begin(), vec.end(), queue); +/// +/// // calculate the sum of the values in the vector (also see reduce()) +/// float sum = boost::compute::accumulate(vec.begin(), vec.end(), 0, queue); +/// +/// // reverse the values in the vector +/// boost::compute::reverse(vec.begin(), vec.end(), queue); +/// +/// // fill the vector with ones +/// boost::compute::fill(vec.begin(), vec.end(), 1, queue); +/// \endcode +/// +/// \see \ref array "array<T, N>", buffer +template<class T, class Alloc = buffer_allocator<T> > +class vector +{ +public: + typedef T value_type; + typedef Alloc allocator_type; + typedef typename allocator_type::size_type size_type; + typedef typename allocator_type::difference_type difference_type; + typedef detail::buffer_value<T> reference; + typedef const detail::buffer_value<T> const_reference; + typedef typename allocator_type::pointer pointer; + typedef typename allocator_type::const_pointer const_pointer; + typedef buffer_iterator<T> iterator; + typedef buffer_iterator<T> const_iterator; + typedef std::reverse_iterator<iterator> reverse_iterator; + typedef std::reverse_iterator<const_iterator> const_reverse_iterator; + + /// Creates an empty vector in \p context. + explicit vector(const context &context = system::default_context()) + : m_size(0), + m_allocator(context) + { + m_data = m_allocator.allocate(_minimum_capacity()); + } + + /// Creates a vector with space for \p count elements in \p context. + /// + /// Note that unlike \c std::vector's constructor, this will not initialize + /// the values in the container. Either call the vector constructor which + /// takes a value to initialize with or use the fill() algorithm to set + /// the initial values. + /// + /// For example: + /// \code + /// // create a vector on the device with space for ten ints + /// boost::compute::vector<int> vec(10, context); + /// \endcode + explicit vector(size_type count, + const context &context = system::default_context()) + : m_size(count), + m_allocator(context) + { + m_data = m_allocator.allocate((std::max)(count, _minimum_capacity())); + } + + /// Creates a vector with space for \p count elements and sets each equal + /// to \p value. + /// + /// For example: + /// \code + /// // creates a vector with four values set to nine (e.g. [9, 9, 9, 9]). + /// boost::compute::vector<int> vec(4, 9, queue); + /// \endcode + vector(size_type count, + const T &value, + command_queue &queue = system::default_queue()) + : m_size(count), + m_allocator(queue.get_context()) + { + m_data = m_allocator.allocate((std::max)(count, _minimum_capacity())); + + ::boost::compute::fill_n(begin(), count, value, queue); + } + + /// Creates a vector with space for the values in the range [\p first, + /// \p last) and copies them into the vector with \p queue. + /// + /// For example: + /// \code + /// // values on the host + /// int data[] = { 1, 2, 3, 4 }; + /// + /// // create a vector of size four and copy the values from data + /// boost::compute::vector<int> vec(data, data + 4, queue); + /// \endcode + template<class InputIterator> + vector(InputIterator first, + InputIterator last, + command_queue &queue = system::default_queue()) + : m_size(detail::iterator_range_size(first, last)), + m_allocator(queue.get_context()) + { + m_data = m_allocator.allocate((std::max)(m_size, _minimum_capacity())); + + ::boost::compute::copy(first, last, begin(), queue); + } + + /// Creates a new vector and copies the values from \p other. + vector(const vector &other, + command_queue &queue = system::default_queue()) + : m_size(other.m_size), + m_allocator(other.m_allocator) + { + m_data = m_allocator.allocate((std::max)(m_size, _minimum_capacity())); + + if(!other.empty()){ + ::boost::compute::copy(other.begin(), other.end(), begin(), queue); + queue.finish(); + } + } + + /// Creates a new vector and copies the values from \p other. + template<class OtherAlloc> + vector(const vector<T, OtherAlloc> &other, + command_queue &queue = system::default_queue()) + : m_size(other.size()), + m_allocator(queue.get_context()) + { + m_data = m_allocator.allocate((std::max)(m_size, _minimum_capacity())); + + if(!other.empty()){ + ::boost::compute::copy(other.begin(), other.end(), begin(), queue); + queue.finish(); + } + } + + /// Creates a new vector and copies the values from \p vector. + template<class OtherAlloc> + vector(const std::vector<T, OtherAlloc> &vector, + command_queue &queue = system::default_queue()) + : m_size(vector.size()), + m_allocator(queue.get_context()) + { + m_data = m_allocator.allocate((std::max)(m_size, _minimum_capacity())); + + ::boost::compute::copy(vector.begin(), vector.end(), begin(), queue); + } + + #ifndef BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST + vector(std::initializer_list<T> list, + command_queue &queue = system::default_queue()) + : m_size(list.size()), + m_allocator(queue.get_context()) + { + m_data = m_allocator.allocate((std::max)(m_size, _minimum_capacity())); + + ::boost::compute::copy(list.begin(), list.end(), begin(), queue); + } + #endif // BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST + + vector& operator=(const vector &other) + { + if(this != &other){ + command_queue queue = default_queue(); + resize(other.size(), queue); + ::boost::compute::copy(other.begin(), other.end(), begin(), queue); + queue.finish(); + } + + return *this; + } + + template<class OtherAlloc> + vector& operator=(const std::vector<T, OtherAlloc> &vector) + { + command_queue queue = default_queue(); + resize(vector.size(), queue); + ::boost::compute::copy(vector.begin(), vector.end(), begin(), queue); + queue.finish(); + return *this; + } + + #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES + /// Move-constructs a new vector from \p other. + vector(vector&& other) + : m_data(std::move(other.m_data)), + m_size(other.m_size), + m_allocator(std::move(other.m_allocator)) + { + other.m_size = 0; + } + + /// Move-assigns the data from \p other to \c *this. + vector& operator=(vector&& other) + { + if(m_size){ + m_allocator.deallocate(m_data, m_size); + } + + m_data = std::move(other.m_data); + m_size = other.m_size; + m_allocator = std::move(other.m_allocator); + + other.m_size = 0; + + return *this; + } + #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES + + /// Destroys the vector object. + ~vector() + { + if(m_size){ + m_allocator.deallocate(m_data, m_size); + } + } + + iterator begin() + { + return ::boost::compute::make_buffer_iterator<T>(m_data.get_buffer(), 0); + } + + const_iterator begin() const + { + return ::boost::compute::make_buffer_iterator<T>(m_data.get_buffer(), 0); + } + + const_iterator cbegin() const + { + return begin(); + } + + iterator end() + { + return ::boost::compute::make_buffer_iterator<T>(m_data.get_buffer(), m_size); + } + + const_iterator end() const + { + return ::boost::compute::make_buffer_iterator<T>(m_data.get_buffer(), m_size); + } + + const_iterator cend() const + { + return end(); + } + + reverse_iterator rbegin() + { + return reverse_iterator(end() - 1); + } + + const_reverse_iterator rbegin() const + { + return reverse_iterator(end() - 1); + } + + const_reverse_iterator crbegin() const + { + return rbegin(); + } + + reverse_iterator rend() + { + return reverse_iterator(begin() - 1); + } + + const_reverse_iterator rend() const + { + return reverse_iterator(begin() - 1); + } + + const_reverse_iterator crend() const + { + return rend(); + } + + /// Returns the number of elements in the vector. + size_type size() const + { + return m_size; + } + + size_type max_size() const + { + return m_allocator.max_size(); + } + + /// Resizes the vector to \p size. + void resize(size_type size, command_queue &queue) + { + if(size < capacity()){ + m_size = size; + } + else { + // allocate new buffer + pointer new_data = + m_allocator.allocate( + static_cast<size_type>( + static_cast<float>(size) * _growth_factor() + ) + ); + + // copy old values to the new buffer + ::boost::compute::copy(m_data, m_data + m_size, new_data, queue); + + // free old memory + m_allocator.deallocate(m_data, m_size); + + // set new data and size + m_data = new_data; + m_size = size; + } + } + + /// \overload + void resize(size_type size) + { + command_queue queue = default_queue(); + resize(size, queue); + queue.finish(); + } + + /// Returns \c true if the vector is empty. + bool empty() const + { + return m_size == 0; + } + + /// Returns the capacity of the vector. + size_type capacity() const + { + return m_data.get_buffer().size() / sizeof(T); + } + + void reserve(size_type size, command_queue &queue) + { + (void) size; + (void) queue; + } + + void reserve(size_type size) + { + command_queue queue = default_queue(); + reserve(size, queue); + queue.finish(); + } + + void shrink_to_fit(command_queue &queue) + { + (void) queue; + } + + void shrink_to_fit() + { + command_queue queue = default_queue(); + shrink_to_fit(queue); + queue.finish(); + } + + reference operator[](size_type index) + { + return *(begin() + static_cast<difference_type>(index)); + } + + const_reference operator[](size_type index) const + { + return *(begin() + static_cast<difference_type>(index)); + } + + reference at(size_type index) + { + if(index >= size()){ + BOOST_THROW_EXCEPTION(std::out_of_range("index out of range")); + } + + return operator[](index); + } + + const_reference at(size_type index) const + { + if(index >= size()){ + BOOST_THROW_EXCEPTION(std::out_of_range("index out of range")); + } + + return operator[](index); + } + + reference front() + { + return *begin(); + } + + const_reference front() const + { + return *begin(); + } + + reference back() + { + return *(end() - static_cast<difference_type>(1)); + } + + const_reference back() const + { + return *(end() - static_cast<difference_type>(1)); + } + + template<class InputIterator> + void assign(InputIterator first, + InputIterator last, + command_queue &queue) + { + // resize vector for new contents + resize(detail::iterator_range_size(first, last), queue); + + // copy values into the vector + ::boost::compute::copy(first, last, begin(), queue); + } + + template<class InputIterator> + void assign(InputIterator first, InputIterator last) + { + command_queue queue = default_queue(); + assign(first, last, queue); + queue.finish(); + } + + void assign(size_type n, const T &value, command_queue &queue) + { + // resize vector for new contents + resize(n, queue); + + // fill vector with value + ::boost::compute::fill_n(begin(), n, value, queue); + } + + void assign(size_type n, const T &value) + { + command_queue queue = default_queue(); + assign(n, value, queue); + queue.finish(); + } + + /// Inserts \p value at the end of the vector (resizing if neccessary). + /// + /// Note that calling \c push_back() to insert data values one at a time + /// is inefficient as there is a non-trivial overhead in performing a data + /// transfer to the device. It is usually better to store a set of values + /// on the host (for example, in a \c std::vector) and then transfer them + /// in bulk using the \c insert() method or the copy() algorithm. + void push_back(const T &value, command_queue &queue) + { + insert(end(), value, queue); + } + + /// \overload + void push_back(const T &value) + { + command_queue queue = default_queue(); + push_back(value, queue); + queue.finish(); + } + + void pop_back(command_queue &queue) + { + resize(size() - 1, queue); + } + + void pop_back() + { + command_queue queue = default_queue(); + pop_back(queue); + queue.finish(); + } + + iterator insert(iterator position, const T &value, command_queue &queue) + { + if(position == end()){ + resize(m_size + 1, queue); + position = begin() + position.get_index(); + ::boost::compute::copy_n(&value, 1, position, queue); + } + else { + ::boost::compute::vector<T, Alloc> tmp(position, end(), queue); + resize(m_size + 1, queue); + position = begin() + position.get_index(); + ::boost::compute::copy_n(&value, 1, position, queue); + ::boost::compute::copy(tmp.begin(), tmp.end(), position + 1, queue); + } + + return position + 1; + } + + iterator insert(iterator position, const T &value) + { + command_queue queue = default_queue(); + iterator iter = insert(position, value, queue); + queue.finish(); + return iter; + } + + void insert(iterator position, + size_type count, + const T &value, + command_queue &queue) + { + ::boost::compute::vector<T, Alloc> tmp(position, end(), queue); + resize(size() + count, queue); + + position = begin() + position.get_index(); + + ::boost::compute::fill_n(position, count, value, queue); + ::boost::compute::copy( + tmp.begin(), + tmp.end(), + position + static_cast<difference_type>(count), + queue + ); + } + + void insert(iterator position, size_type count, const T &value) + { + command_queue queue = default_queue(); + insert(position, count, value, queue); + queue.finish(); + } + + /// Inserts the values in the range [\p first, \p last) into the vector at + /// \p position using \p queue. + template<class InputIterator> + void insert(iterator position, + InputIterator first, + InputIterator last, + command_queue &queue) + { + ::boost::compute::vector<T, Alloc> tmp(position, end(), queue); + + size_type count = detail::iterator_range_size(first, last); + resize(size() + count, queue); + + position = begin() + position.get_index(); + + ::boost::compute::copy(first, last, position, queue); + ::boost::compute::copy( + tmp.begin(), + tmp.end(), + position + static_cast<difference_type>(count), + queue + ); + } + + /// \overload + template<class InputIterator> + void insert(iterator position, InputIterator first, InputIterator last) + { + command_queue queue = default_queue(); + insert(position, first, last, queue); + queue.finish(); + } + + iterator erase(iterator position, command_queue &queue) + { + return erase(position, position + 1, queue); + } + + iterator erase(iterator position) + { + command_queue queue = default_queue(); + iterator iter = erase(position, queue); + queue.finish(); + return iter; + } + + iterator erase(iterator first, iterator last, command_queue &queue) + { + if(last != end()){ + ::boost::compute::vector<T, Alloc> tmp(last, end(), queue); + ::boost::compute::copy(tmp.begin(), tmp.end(), first, queue); + } + + difference_type count = std::distance(first, last); + resize(size() - static_cast<size_type>(count), queue); + + return begin() + first.get_index() + count; + } + + iterator erase(iterator first, iterator last) + { + command_queue queue = default_queue(); + iterator iter = erase(first, last, queue); + queue.finish(); + return iter; + } + + /// Swaps the contents of \c *this with \p other. + void swap(vector &other) + { + std::swap(m_data, other.m_data); + std::swap(m_size, other.m_size); + std::swap(m_allocator, other.m_allocator); + } + + /// Removes all elements from the vector. + void clear() + { + m_size = 0; + } + + allocator_type get_allocator() const + { + return m_allocator; + } + + /// Returns the underlying buffer. + const buffer& get_buffer() const + { + return m_data.get_buffer(); + } + + /// \internal_ + /// + /// Returns a command queue usable to issue commands for the vector's + /// memory buffer. This is used when a member function is called without + /// specifying an existing command queue to use. + command_queue default_queue() const + { + const context &context = m_allocator.get_context(); + command_queue queue(context, context.get_device()); + return queue; + } + +private: + /// \internal_ + BOOST_CONSTEXPR size_type _minimum_capacity() const { return 4; } + + /// \internal_ + BOOST_CONSTEXPR float _growth_factor() const { return 1.5; } + +private: + pointer m_data; + size_type m_size; + allocator_type m_allocator; +}; + +namespace detail { + +// set_kernel_arg specialization for vector<T> +template<class T, class Alloc> +struct set_kernel_arg<vector<T, Alloc> > +{ + void operator()(kernel &kernel_, size_t index, const vector<T, Alloc> &vector) + { + kernel_.set_arg(index, vector.get_buffer()); + } +}; + +// for capturing vector<T> with BOOST_COMPUTE_CLOSURE() +template<class T, class Alloc> +struct capture_traits<vector<T, Alloc> > +{ + static std::string type_name() + { + return std::string("__global ") + ::boost::compute::type_name<T>() + "*"; + } +}; + +// meta_kernel streaming operator for vector<T> +template<class T, class Alloc> +meta_kernel& operator<<(meta_kernel &k, const vector<T, Alloc> &vector) +{ + return k << k.get_buffer_identifier<T>(vector.get_buffer()); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_CONTAINER_VECTOR_HPP diff --git a/boost/compute/context.hpp b/boost/compute/context.hpp new file mode 100644 index 0000000000..5db39e9d83 --- /dev/null +++ b/boost/compute/context.hpp @@ -0,0 +1,245 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_CONTEXT_HPP +#define BOOST_COMPUTE_CONTEXT_HPP + +#include <vector> + +#include <boost/throw_exception.hpp> + +#include <boost/compute/config.hpp> +#include <boost/compute/device.hpp> +#include <boost/compute/exception/opencl_error.hpp> +#include <boost/compute/detail/assert_cl_success.hpp> + +namespace boost { +namespace compute { + +/// \class context +/// \brief A compute context. +/// +/// The context class represents a compute context. +/// +/// A context object manages a set of OpenCL resources including memory +/// buffers and program objects. Before allocating memory on the device or +/// executing kernels you must set up a context object. +/// +/// To create a context for the default device on the system: +/// \code +/// // get the default compute device +/// boost::compute::device gpu = boost::compute::system::default_device(); +/// +/// // create a context for the device +/// boost::compute::context context(gpu); +/// \endcode +/// +/// Once a context is created, memory can be allocated using the buffer class +/// and kernels can be executed using the command_queue class. +/// +/// \see device, command_queue +class context +{ +public: + /// Create a null context object. + context() + : m_context(0) + { + } + + /// Creates a new context for \p device with \p properties. + /// + /// \see_opencl_ref{clCreateContext} + explicit context(const device &device, + const cl_context_properties *properties = 0) + { + BOOST_ASSERT(device.id() != 0); + + cl_device_id device_id = device.id(); + + cl_int error = 0; + m_context = clCreateContext(properties, 1, &device_id, 0, 0, &error); + + if(!m_context){ + BOOST_THROW_EXCEPTION(opencl_error(error)); + } + } + + /// Creates a new context for \p devices with \p properties. + /// + /// \see_opencl_ref{clCreateContext} + explicit context(const std::vector<device> &devices, + const cl_context_properties *properties = 0) + { + BOOST_ASSERT(!devices.empty()); + + cl_int error = 0; + + m_context = clCreateContext( + properties, + static_cast<cl_uint>(devices.size()), + reinterpret_cast<const cl_device_id *>(&devices[0]), + 0, + 0, + &error + ); + + if(!m_context){ + BOOST_THROW_EXCEPTION(opencl_error(error)); + } + } + + /// Creates a new context object for \p context. If \p retain is + /// \c true, the reference count for \p context will be incremented. + explicit context(cl_context context, bool retain = true) + : m_context(context) + { + if(m_context && retain){ + clRetainContext(m_context); + } + } + + /// Creates a new context object as a copy of \p other. + context(const context &other) + : m_context(other.m_context) + { + if(m_context){ + clRetainContext(m_context); + } + } + + /// Copies the context object from \p other to \c *this. + context& operator=(const context &other) + { + if(this != &other){ + if(m_context){ + clReleaseContext(m_context); + } + + m_context = other.m_context; + + if(m_context){ + clRetainContext(m_context); + } + } + + return *this; + } + + #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES + /// Move-constructs a new context object from \p other. + context(context&& other) BOOST_NOEXCEPT + : m_context(other.m_context) + { + other.m_context = 0; + } + + /// Move-assigns the context from \p other to \c *this. + context& operator=(context&& other) BOOST_NOEXCEPT + { + if(m_context){ + clReleaseContext(m_context); + } + + m_context = other.m_context; + other.m_context = 0; + + return *this; + } + #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES + + /// Destroys the context object. + ~context() + { + if(m_context){ + BOOST_COMPUTE_ASSERT_CL_SUCCESS( + clReleaseContext(m_context) + ); + } + } + + /// Returns the underlying OpenCL context. + cl_context& get() const + { + return const_cast<cl_context &>(m_context); + } + + /// Returns the device for the context. If the context contains multiple + /// devices, the first is returned. + device get_device() const + { + std::vector<device> devices = get_devices(); + + if(devices.empty()) { + return device(); + } + + return devices.front(); + } + + /// Returns a vector of devices for the context. + std::vector<device> get_devices() const + { + return get_info<std::vector<device> >(CL_CONTEXT_DEVICES); + } + + /// Returns information about the context. + /// + /// \see_opencl_ref{clGetContextInfo} + template<class T> + T get_info(cl_context_info info) const + { + return detail::get_object_info<T>(clGetContextInfo, m_context, info); + } + + /// \overload + template<int Enum> + typename detail::get_object_info_type<context, Enum>::type + get_info() const; + + /// Returns \c true if the context is the same at \p other. + bool operator==(const context &other) const + { + return m_context == other.m_context; + } + + /// Returns \c true if the context is different from \p other. + bool operator!=(const context &other) const + { + return m_context != other.m_context; + } + + /// \internal_ + operator cl_context() const + { + return m_context; + } + +private: + cl_context m_context; +}; + +/// \internal_ define get_info() specializations for context +BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(context, + ((cl_uint, CL_CONTEXT_REFERENCE_COUNT)) + ((std::vector<cl_device_id>, CL_CONTEXT_DEVICES)) + ((std::vector<cl_context_properties>, CL_CONTEXT_PROPERTIES)) +) + +#ifdef CL_VERSION_1_1 +BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(context, + ((cl_uint, CL_CONTEXT_NUM_DEVICES)) +) +#endif // CL_VERSION_1_1 + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_CONTEXT_HPP diff --git a/boost/compute/core.hpp b/boost/compute/core.hpp new file mode 100644 index 0000000000..a8e2eb92a6 --- /dev/null +++ b/boost/compute/core.hpp @@ -0,0 +1,32 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_CORE_HPP +#define BOOST_COMPUTE_CORE_HPP + +/// \file +/// +/// Meta-header to include all Boost.Compute core headers. + +#include <boost/compute/buffer.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/config.hpp> +#include <boost/compute/context.hpp> +#include <boost/compute/device.hpp> +#include <boost/compute/event.hpp> +#include <boost/compute/kernel.hpp> +#include <boost/compute/memory_object.hpp> +#include <boost/compute/platform.hpp> +#include <boost/compute/program.hpp> +#include <boost/compute/system.hpp> +#include <boost/compute/user_event.hpp> +#include <boost/compute/version.hpp> + +#endif // BOOST_COMPUTE_CORE_HPP diff --git a/boost/compute/detail/assert_cl_success.hpp b/boost/compute/detail/assert_cl_success.hpp new file mode 100644 index 0000000000..78acaf6caf --- /dev/null +++ b/boost/compute/detail/assert_cl_success.hpp @@ -0,0 +1,24 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DETAIL_ASSERT_CL_SUCCESS_HPP +#define BOOST_COMPUTE_DETAIL_ASSERT_CL_SUCCESS_HPP + +#include <boost/assert.hpp> + +#if defined(BOOST_DISABLE_ASSERTS) || defined(NDEBUG) +#define BOOST_COMPUTE_ASSERT_CL_SUCCESS(function) \ + function +#else +#define BOOST_COMPUTE_ASSERT_CL_SUCCESS(function) \ + BOOST_ASSERT(function == CL_SUCCESS) +#endif + +#endif // BOOST_COMPUTE_DETAIL_ASSERT_CL_SUCCESS_HPP diff --git a/boost/compute/detail/buffer_value.hpp b/boost/compute/detail/buffer_value.hpp new file mode 100644 index 0000000000..6a4e78fc19 --- /dev/null +++ b/boost/compute/detail/buffer_value.hpp @@ -0,0 +1,178 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DETAIL_BUFFER_VALUE_HPP +#define BOOST_COMPUTE_DETAIL_BUFFER_VALUE_HPP + +#include <boost/compute/context.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/detail/device_ptr.hpp> +#include <boost/compute/detail/read_write_single_value.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class T> +class buffer_value +{ +public: + typedef T value_type; + + buffer_value() + { + } + + buffer_value(const value_type &value) + : m_value(value) + { + } + + // creates a reference for the value in buffer at index (in bytes). + buffer_value(const buffer &buffer, size_t index) + : m_buffer(buffer.get(), false), + m_index(index) + { + } + + buffer_value(const buffer_value<T> &other) + : m_buffer(other.m_buffer.get(), false), + m_index(other.m_index) + { + } + + ~buffer_value() + { + // set buffer to null so that its reference count will + // not be decremented when its destructor is called + m_buffer.get() = 0; + } + + operator value_type() const + { + if(m_buffer.get()){ + const context &context = m_buffer.get_context(); + const device &device = context.get_device(); + command_queue queue(context, device); + + return detail::read_single_value<T>(m_buffer, m_index / sizeof(T), queue); + } + else { + return m_value; + } + } + + buffer_value<T> operator-() const + { + return -T(*this); + } + + bool operator<(const T &value) const + { + return T(*this) < value; + } + + bool operator>(const T &value) const + { + return T(*this) > value; + } + + bool operator<=(const T &value) const + { + return T(*this) <= value; + } + + bool operator>=(const T &value) const + { + return T(*this) <= value; + } + + bool operator==(const T &value) const + { + return T(*this) == value; + } + + bool operator==(const buffer_value<T> &other) const + { + if(m_buffer.get() != other.m_buffer.get()){ + return false; + } + + if(m_buffer.get()){ + return m_index == other.m_index; + } + else { + return m_value == other.m_value; + } + } + + bool operator!=(const T &value) const + { + return T(*this) != value; + } + + buffer_value<T>& operator=(const T &value) + { + if(m_buffer.get()){ + const context &context = m_buffer.get_context(); + command_queue queue(context, context.get_device()); + + detail::write_single_value<T>(value, m_buffer, m_index / sizeof(T), queue); + + return *this; + } + else { + m_value = value; + return *this; + } + } + + buffer_value<T>& operator=(const buffer_value<T> &value) + { + return operator=(T(value)); + } + + detail::device_ptr<T> operator&() const + { + return detail::device_ptr<T>(m_buffer, m_index); + } + + buffer_value<T>& operator++() + { + if(m_buffer.get()){ + T value = T(*this); + value++; + *this = value; + } + else { + m_value++; + } + + return *this; + } + + buffer_value<T> operator++(int) + { + buffer_value<T> result(*this); + ++(*this); + return result; + } + +private: + const buffer m_buffer; + size_t m_index; + value_type m_value; +}; + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_DETAIL_BUFFER_VALUE_HPP diff --git a/boost/compute/detail/device_ptr.hpp b/boost/compute/detail/device_ptr.hpp new file mode 100644 index 0000000000..29ecd13631 --- /dev/null +++ b/boost/compute/detail/device_ptr.hpp @@ -0,0 +1,215 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DEVICE_PTR_HPP +#define BOOST_COMPUTE_DEVICE_PTR_HPP + +#include <boost/type_traits.hpp> +#include <boost/static_assert.hpp> + +#include <boost/compute/buffer.hpp> +#include <boost/compute/config.hpp> +#include <boost/compute/detail/is_buffer_iterator.hpp> +#include <boost/compute/detail/read_write_single_value.hpp> +#include <boost/compute/type_traits/is_device_iterator.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class T, class IndexExpr> +struct device_ptr_index_expr +{ + typedef T result_type; + + device_ptr_index_expr(const buffer &buffer, + uint_ index, + const IndexExpr &expr) + : m_buffer(buffer), + m_index(index), + m_expr(expr) + { + } + + operator T() const + { + BOOST_STATIC_ASSERT_MSG(boost::is_integral<IndexExpr>::value, + "Index expression must be integral"); + + BOOST_ASSERT(m_buffer.get()); + + const context &context = m_buffer.get_context(); + const device &device = context.get_device(); + command_queue queue(context, device); + + return detail::read_single_value<T>(m_buffer, m_expr, queue); + } + + const buffer &m_buffer; + uint_ m_index; + IndexExpr m_expr; +}; + +template<class T> +class device_ptr +{ +public: + typedef T value_type; + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + typedef std::random_access_iterator_tag iterator_category; + typedef T* pointer; + typedef T& reference; + + device_ptr() + : m_index(0) + { + } + + device_ptr(const buffer &buffer, size_t index = 0) + : m_buffer(buffer.get(), false), + m_index(index) + { + } + + device_ptr(const device_ptr<T> &other) + : m_buffer(other.m_buffer.get(), false), + m_index(other.m_index) + { + } + + device_ptr<T>& operator=(const device_ptr<T> &other) + { + if(this != &other){ + m_buffer.get() = other.m_buffer.get(); + m_index = other.m_index; + } + + return *this; + } + + #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES + device_ptr(device_ptr<T>&& other) BOOST_NOEXCEPT + : m_buffer(other.m_buffer.get(), false), + m_index(other.m_index) + { + other.m_buffer.get() = 0; + } + + device_ptr<T>& operator=(device_ptr<T>&& other) BOOST_NOEXCEPT + { + m_buffer.get() = other.m_buffer.get(); + m_index = other.m_index; + + other.m_buffer.get() = 0; + + return *this; + } + #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES + + ~device_ptr() + { + // set buffer to null so that its reference count will + // not be decremented when its destructor is called + m_buffer.get() = 0; + } + + size_type get_index() const + { + return m_index; + } + + const buffer& get_buffer() const + { + return m_buffer; + } + + template<class OT> + device_ptr<OT> cast() const + { + return device_ptr<OT>(m_buffer, m_index); + } + + device_ptr<T> operator+(difference_type n) const + { + return device_ptr<T>(m_buffer, m_index + n); + } + + device_ptr<T> operator+(const device_ptr<T> &other) const + { + return device_ptr<T>(m_buffer, m_index + other.m_index); + } + + device_ptr<T>& operator+=(difference_type n) + { + m_index += static_cast<size_t>(n); + return *this; + } + + difference_type operator-(const device_ptr<T> &other) const + { + return static_cast<difference_type>(m_index - other.m_index); + } + + device_ptr<T>& operator-=(difference_type n) + { + m_index -= n; + return *this; + } + + bool operator==(const device_ptr<T> &other) const + { + return m_buffer.get() == other.m_buffer.get() && + m_index == other.m_index; + } + + bool operator!=(const device_ptr<T> &other) const + { + return !(*this == other); + } + + template<class Expr> + detail::device_ptr_index_expr<T, Expr> + operator[](const Expr &expr) const + { + BOOST_ASSERT(m_buffer.get()); + + return detail::device_ptr_index_expr<T, Expr>(m_buffer, + uint_(m_index), + expr); + } + +private: + const buffer m_buffer; + size_t m_index; +}; + +// is_buffer_iterator specialization for device_ptr +template<class Iterator> +struct is_buffer_iterator< + Iterator, + typename boost::enable_if< + boost::is_same< + device_ptr<typename Iterator::value_type>, + typename boost::remove_const<Iterator>::type + > + >::type +> : public boost::true_type {}; + +} // end detail namespace + +// is_device_iterator specialization for device_ptr +template<class T> +struct is_device_iterator<detail::device_ptr<T> > : boost::true_type {}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_DEVICE_PTR_HPP diff --git a/boost/compute/detail/diagnostic.hpp b/boost/compute/detail/diagnostic.hpp new file mode 100644 index 0000000000..76a69f6570 --- /dev/null +++ b/boost/compute/detail/diagnostic.hpp @@ -0,0 +1,112 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2016 Jakub Szuppe <j.szuppe@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DETAIL_DIAGNOSTIC_HPP +#define BOOST_COMPUTE_DETAIL_DIAGNOSTIC_HPP + +// Macros for suppressing warnings for GCC version 4.6 or later. Usage: +// +// BOOST_COMPUTE_BOOST_COMPUTE_GCC_DIAG_OFF(sign-compare); +// if(a < b){ +// BOOST_COMPUTE_BOOST_COMPUTE_GCC_DIAG_ON(sign-compare); +// +// Source: https://svn.boost.org/trac/boost/wiki/Guidelines/WarningsGuidelines +#if ((__GNUC__ * 100) + __GNUC_MINOR__) >= 402 +#define BOOST_COMPUTE_GCC_DIAG_STR(s) #s +#define BOOST_COMPUTE_GCC_DIAG_JOINSTR(x,y) BOOST_COMPUTE_GCC_DIAG_STR(x ## y) +# define BOOST_COMPUTE_GCC_DIAG_DO_PRAGMA(x) _Pragma (#x) +# define BOOST_COMPUTE_GCC_DIAG_PRAGMA(x) BOOST_COMPUTE_GCC_DIAG_DO_PRAGMA(GCC diagnostic x) +# if ((__GNUC__ * 100) + __GNUC_MINOR__) >= 406 +# define BOOST_COMPUTE_GCC_DIAG_OFF(x) BOOST_COMPUTE_GCC_DIAG_PRAGMA(push) \ + BOOST_COMPUTE_GCC_DIAG_PRAGMA(ignored BOOST_COMPUTE_GCC_DIAG_JOINSTR(-W,x)) +# define BOOST_COMPUTE_GCC_DIAG_ON(x) BOOST_COMPUTE_GCC_DIAG_PRAGMA(pop) +# else +# define BOOST_COMPUTE_GCC_DIAG_OFF(x) \ + BOOST_COMPUTE_GCC_DIAG_PRAGMA(ignored BOOST_COMPUTE_GCC_DIAG_JOINSTR(-W,x)) +# define BOOST_COMPUTE_GCC_DIAG_ON(x) \ + BOOST_COMPUTE_GCC_DIAG_PRAGMA(warning BOOST_COMPUTE_GCC_DIAG_JOINSTR(-W,x)) +# endif +#else // Ensure these macros do nothing for other compilers. +# define BOOST_COMPUTE_GCC_DIAG_OFF(x) +# define BOOST_COMPUTE_GCC_DIAG_ON(x) +#endif + +// Macros for suppressing warnings for Clang. +// +// BOOST_COMPUTE_BOOST_COMPUTE_CLANG_DIAG_OFF(sign-compare); +// if(a < b){ +// BOOST_COMPUTE_BOOST_COMPUTE_CLANG_DIAG_ON(sign-compare); +// +// Source: https://svn.boost.org/trac/boost/wiki/Guidelines/WarningsGuidelines +#ifdef __clang__ +# define BOOST_COMPUTE_CLANG_DIAG_STR(s) # s +// stringize s to "no-sign-compare" +# define BOOST_COMPUTE_CLANG_DIAG_JOINSTR(x,y) BOOST_COMPUTE_CLANG_DIAG_STR(x ## y) +// join -W with no-unused-variable to "-Wno-sign-compare" +# define BOOST_COMPUTE_CLANG_DIAG_DO_PRAGMA(x) _Pragma (#x) +// _Pragma is unary operator #pragma ("") +# define BOOST_COMPUTE_CLANG_DIAG_PRAGMA(x) \ + BOOST_COMPUTE_CLANG_DIAG_DO_PRAGMA(clang diagnostic x) +# define BOOST_COMPUTE_CLANG_DIAG_OFF(x) BOOST_COMPUTE_CLANG_DIAG_PRAGMA(push) \ + BOOST_COMPUTE_CLANG_DIAG_PRAGMA(ignored BOOST_COMPUTE_CLANG_DIAG_JOINSTR(-W,x)) +// For example: #pragma clang diagnostic ignored "-Wno-sign-compare" +# define BOOST_COMPUTE_CLANG_DIAG_ON(x) BOOST_COMPUTE_CLANG_DIAG_PRAGMA(pop) +// For example: #pragma clang diagnostic warning "-Wno-sign-compare" +#else // Ensure these macros do nothing for other compilers. +# define BOOST_COMPUTE_CLANG_DIAG_OFF(x) +# define BOOST_COMPUTE_CLANG_DIAG_ON(x) +# define BOOST_COMPUTE_CLANG_DIAG_PRAGMA(x) +#endif + +// Macros for suppressing warnings for MSVC. Usage: +// +// BOOST_COMPUTE_BOOST_COMPUTE_MSVC_DIAG_OFF(4018); //sign-compare +// if(a < b){ +// BOOST_COMPUTE_BOOST_COMPUTE_MSVC_DIAG_ON(4018); +// +#if defined(_MSC_VER) +# define BOOST_COMPUTE_MSVC_DIAG_DO_PRAGMA(x) __pragma(x) +# define BOOST_COMPUTE_MSVC_DIAG_PRAGMA(x) \ + BOOST_COMPUTE_MSVC_DIAG_DO_PRAGMA(warning(x)) +# define BOOST_COMPUTE_MSVC_DIAG_OFF(x) BOOST_COMPUTE_MSVC_DIAG_PRAGMA(push) \ + BOOST_COMPUTE_MSVC_DIAG_PRAGMA(disable: x) +# define BOOST_COMPUTE_MSVC_DIAG_ON(x) BOOST_COMPUTE_MSVC_DIAG_PRAGMA(pop) +#else // Ensure these macros do nothing for other compilers. +# define BOOST_COMPUTE_MSVC_DIAG_OFF(x) +# define BOOST_COMPUTE_MSVC_DIAG_ON(x) +#endif + +// Macros for suppressing warnings for GCC, Clang and MSVC. Usage: +// +// BOOST_COMPUTE_DIAG_OFF(sign-compare, sign-compare, 4018); +// if(a < b){ +// BOOST_COMPUTE_DIAG_ON(sign-compare, sign-compare, 4018); +// +#if defined(_MSC_VER) // MSVC +# define BOOST_COMPUTE_DIAG_OFF(gcc, clang, msvc) BOOST_COMPUTE_MSVC_DIAG_OFF(msvc) +# define BOOST_COMPUTE_DIAG_ON(gcc, clang, msvc) BOOST_COMPUTE_MSVC_DIAG_ON(msvc) +#elif defined(__clang__) // Clang +# define BOOST_COMPUTE_DIAG_OFF(gcc, clang, msvc) BOOST_COMPUTE_CLANG_DIAG_OFF(clang) +# define BOOST_COMPUTE_DIAG_ON(gcc, clang, msvc) BOOST_COMPUTE_CLANG_DIAG_ON(clang) +#elif defined(__GNUC__) // GCC/G++ +# define BOOST_COMPUTE_DIAG_OFF(gcc, clang, msvc) BOOST_COMPUTE_GCC_DIAG_OFF(gcc) +# define BOOST_COMPUTE_DIAG_ON(gcc, clang, msvc) BOOST_COMPUTE_GCC_DIAG_ON(gcc) +#else // Ensure these macros do nothing for other compilers. +# define BOOST_COMPUTE_DIAG_OFF(gcc, clang, msvc) +# define BOOST_COMPUTE_DIAG_ON(gcc, clang, msvc) +#endif + +#define BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS() \ + BOOST_COMPUTE_DIAG_OFF(deprecated-declarations, deprecated-declarations, 4996) +#define BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS() \ + BOOST_COMPUTE_DIAG_ON(deprecated-declarations, deprecated-declarations, 4996); + + +#endif /* BOOST_COMPUTE_DETAIL_DIAGNOSTIC_HPP */ diff --git a/boost/compute/detail/duration.hpp b/boost/compute/detail/duration.hpp new file mode 100644 index 0000000000..601f12d291 --- /dev/null +++ b/boost/compute/detail/duration.hpp @@ -0,0 +1,50 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DETAIL_DURATION_HPP +#define BOOST_COMPUTE_DETAIL_DURATION_HPP + +#include <boost/config.hpp> + +#ifndef BOOST_COMPUTE_NO_HDR_CHRONO +#include <chrono> +#endif + +#include <boost/chrono/duration.hpp> + +namespace boost { +namespace compute { +namespace detail { + +#ifndef BOOST_COMPUTE_NO_HDR_CHRONO +template<class Rep, class Period> +inline std::chrono::duration<Rep, Period> +make_duration_from_nanoseconds(std::chrono::duration<Rep, Period>, size_t nanoseconds) +{ + return std::chrono::duration_cast<std::chrono::duration<Rep, Period> >( + std::chrono::nanoseconds(nanoseconds) + ); +} +#endif // BOOST_COMPUTE_NO_HDR_CHRONO + +template<class Rep, class Period> +inline boost::chrono::duration<Rep, Period> +make_duration_from_nanoseconds(boost::chrono::duration<Rep, Period>, size_t nanoseconds) +{ + return boost::chrono::duration_cast<boost::chrono::duration<Rep, Period> >( + boost::chrono::nanoseconds(nanoseconds) + ); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_DETAIL_DURATION_HPP diff --git a/boost/compute/detail/get_object_info.hpp b/boost/compute/detail/get_object_info.hpp new file mode 100644 index 0000000000..cdc20cbc13 --- /dev/null +++ b/boost/compute/detail/get_object_info.hpp @@ -0,0 +1,216 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DETAIL_GET_OBJECT_INFO_HPP +#define BOOST_COMPUTE_DETAIL_GET_OBJECT_INFO_HPP + +#include <string> +#include <vector> + +#include <boost/preprocessor/seq/for_each.hpp> +#include <boost/preprocessor/tuple/elem.hpp> + +#include <boost/throw_exception.hpp> + +#include <boost/compute/cl.hpp> +#include <boost/compute/exception/opencl_error.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class Function, class Object, class AuxInfo> +struct bound_info_function +{ + bound_info_function(Function function, Object object, AuxInfo aux_info) + : m_function(function), + m_object(object), + m_aux_info(aux_info) + { + } + + template<class Info> + cl_int operator()(Info info, size_t size, void *value, size_t *size_ret) const + { + return m_function(m_object, m_aux_info, info, size, value, size_ret); + } + + Function m_function; + Object m_object; + AuxInfo m_aux_info; +}; + +template<class Function, class Object> +struct bound_info_function<Function, Object, void> +{ + bound_info_function(Function function, Object object) + : m_function(function), + m_object(object) + { + } + + template<class Info> + cl_int operator()(Info info, size_t size, void *value, size_t *size_ret) const + { + return m_function(m_object, info, size, value, size_ret); + } + + Function m_function; + Object m_object; +}; + +template<class Function, class Object> +inline bound_info_function<Function, Object, void> +bind_info_function(Function f, Object o) +{ + return bound_info_function<Function, Object, void>(f, o); +} + +template<class Function, class Object, class AuxInfo> +inline bound_info_function<Function, Object, AuxInfo> +bind_info_function(Function f, Object o, AuxInfo j) +{ + return bound_info_function<Function, Object, AuxInfo>(f, o, j); +} + +// default implementation +template<class T> +struct get_object_info_impl +{ + template<class Function, class Info> + T operator()(Function function, Info info) const + { + T value; + + cl_int ret = function(info, sizeof(T), &value, 0); + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return value; + } +}; + +// specialization for bool +template<> +struct get_object_info_impl<bool> +{ + template<class Function, class Info> + bool operator()(Function function, Info info) const + { + cl_bool value; + + cl_int ret = function(info, sizeof(cl_bool), &value, 0); + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return value == CL_TRUE; + } +}; + +// specialization for std::string +template<> +struct get_object_info_impl<std::string> +{ + template<class Function, class Info> + std::string operator()(Function function, Info info) const + { + size_t size = 0; + + cl_int ret = function(info, 0, 0, &size); + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + if(size == 0){ + return std::string(); + } + + std::string value(size - 1, 0); + + ret = function(info, size, &value[0], 0); + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return value; + } +}; + +// specialization for std::vector<T> +template<class T> +struct get_object_info_impl<std::vector<T> > +{ + template<class Function, class Info> + std::vector<T> operator()(Function function, Info info) const + { + size_t size = 0; + + cl_int ret = function(info, 0, 0, &size); + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + std::vector<T> vector(size / sizeof(T)); + ret = function(info, size, &vector[0], 0); + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return vector; + } +}; + +// returns the value (of type T) from the given clGet*Info() function call. +template<class T, class Function, class Object, class Info> +inline T get_object_info(Function f, Object o, Info i) +{ + return get_object_info_impl<T>()(bind_info_function(f, o), i); +} + +template<class T, class Function, class Object, class Info, class AuxInfo> +inline T get_object_info(Function f, Object o, Info i, AuxInfo j) +{ + return get_object_info_impl<T>()(bind_info_function(f, o, j), i); +} + +// returns the value type for the clGet*Info() call on Object with Enum. +template<class Object, int Enum> +struct get_object_info_type; + +// defines the object::get_info<Enum>() specialization +#define BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATION(object_type, result_type, value) \ + namespace detail { \ + template<> struct get_object_info_type<object_type, value> { typedef result_type type; }; \ + } \ + template<> inline result_type object_type::get_info<value>() const \ + { \ + return get_info<result_type>(value); \ + } + +// used by BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS() +#define BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_IMPL(r, data, elem) \ + BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATION( \ + data, BOOST_PP_TUPLE_ELEM(2, 0, elem), BOOST_PP_TUPLE_ELEM(2, 1, elem) \ + ) + +// defines the object::get_info<Enum>() specialization for each +// (result_type, value) tuple in seq for object_type. +#define BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(object_type, seq) \ + BOOST_PP_SEQ_FOR_EACH( \ + BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_IMPL, object_type, seq \ + ) + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_DETAIL_GET_OBJECT_INFO_HPP diff --git a/boost/compute/detail/getenv.hpp b/boost/compute/detail/getenv.hpp new file mode 100644 index 0000000000..ceb3605d5a --- /dev/null +++ b/boost/compute/detail/getenv.hpp @@ -0,0 +1,36 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DETAIL_GETENV_HPP +#define BOOST_COMPUTE_DETAIL_GETENV_HPP + +#include <cstdlib> + +namespace boost { +namespace compute { +namespace detail { + +inline const char* getenv(const char *env_var) +{ +#ifdef _MSC_VER +# pragma warning(push) +# pragma warning(disable: 4996) +#endif + return std::getenv(env_var); +#ifdef _MSC_VER +# pragma warning(pop) +#endif +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_DETAIL_GETENV_HPP diff --git a/boost/compute/detail/global_static.hpp b/boost/compute/detail/global_static.hpp new file mode 100644 index 0000000000..d8014e4252 --- /dev/null +++ b/boost/compute/detail/global_static.hpp @@ -0,0 +1,37 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DETAIL_GLOBAL_STATIC_HPP +#define BOOST_COMPUTE_DETAIL_GLOBAL_STATIC_HPP + +#include <boost/compute/config.hpp> + +#ifdef BOOST_COMPUTE_THREAD_SAFE +# ifdef BOOST_COMPUTE_HAVE_THREAD_LOCAL + // use c++11 thread local storage +# define BOOST_COMPUTE_DETAIL_GLOBAL_STATIC(type, name, ctor) \ + thread_local type name ctor; +# else + // use thread_specific_ptr from boost.thread +# include <boost/thread/tss.hpp> +# define BOOST_COMPUTE_DETAIL_GLOBAL_STATIC(type, name, ctor) \ + static ::boost::thread_specific_ptr< type > BOOST_PP_CAT(name, _tls_ptr_); \ + if(!BOOST_PP_CAT(name, _tls_ptr_).get()){ \ + BOOST_PP_CAT(name, _tls_ptr_).reset(new type ctor); \ + } \ + static type &name = *BOOST_PP_CAT(name, _tls_ptr_); +# endif +#else + // no thread-safety, just use static +# define BOOST_COMPUTE_DETAIL_GLOBAL_STATIC(type, name, ctor) \ + static type name ctor; +#endif + +#endif // BOOST_COMPUTE_DETAIL_GLOBAL_STATIC_HPP diff --git a/boost/compute/detail/is_buffer_iterator.hpp b/boost/compute/detail/is_buffer_iterator.hpp new file mode 100644 index 0000000000..c0caa050d6 --- /dev/null +++ b/boost/compute/detail/is_buffer_iterator.hpp @@ -0,0 +1,30 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DETAIL_IS_BUFFER_ITERATOR_HPP +#define BOOST_COMPUTE_DETAIL_IS_BUFFER_ITERATOR_HPP + +#include <boost/config.hpp> +#include <boost/type_traits.hpp> +#include <boost/utility/enable_if.hpp> + +namespace boost { +namespace compute { +namespace detail { + +// default = false +template<class Iterator, class Enable = void> +struct is_buffer_iterator : public boost::false_type {}; + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_DETAIL_IS_BUFFER_ITERATOR_HPP diff --git a/boost/compute/detail/is_contiguous_iterator.hpp b/boost/compute/detail/is_contiguous_iterator.hpp new file mode 100644 index 0000000000..d0889b2f9e --- /dev/null +++ b/boost/compute/detail/is_contiguous_iterator.hpp @@ -0,0 +1,118 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DETAIL_IS_CONTIGUOUS_ITERATOR_HPP +#define BOOST_COMPUTE_DETAIL_IS_CONTIGUOUS_ITERATOR_HPP + +#include <vector> +#include <valarray> + +#include <boost/config.hpp> +#include <boost/type_traits.hpp> +#include <boost/utility/enable_if.hpp> + +namespace boost { +namespace compute { +namespace detail { + +// default = false +template<class Iterator, class Enable = void> +struct _is_contiguous_iterator : public boost::false_type {}; + +// std::vector<T>::iterator = true +template<class Iterator> +struct _is_contiguous_iterator< + Iterator, + typename boost::enable_if< + typename boost::is_same< + Iterator, + typename std::vector<typename Iterator::value_type>::iterator + >::type + >::type +> : public boost::true_type {}; + +// std::vector<T>::const_iterator = true +template<class Iterator> +struct _is_contiguous_iterator< + Iterator, + typename boost::enable_if< + typename boost::is_same< + Iterator, + typename std::vector<typename Iterator::value_type>::const_iterator + >::type + >::type +> : public boost::true_type {}; + +// std::valarray<T>::iterator = true +template<class Iterator> +struct _is_contiguous_iterator< + Iterator, + typename boost::enable_if< + typename boost::is_same< + Iterator, + typename std::valarray<typename Iterator::value_type>::iterator + >::type + >::type +> : public boost::true_type {}; + +// std::valarray<T>::const_iterator = true +template<class Iterator> +struct _is_contiguous_iterator< + Iterator, + typename boost::enable_if< + typename boost::is_same< + Iterator, + typename std::valarray<typename Iterator::value_type>::const_iterator + >::type + >::type +> : public boost::true_type {}; + +// T* = true +template<class Iterator> +struct _is_contiguous_iterator< + Iterator, + typename boost::enable_if< + boost::is_pointer<Iterator> + >::type +> : public boost::true_type {}; + +// the is_contiguous_iterator meta-function returns true if Iterator points +// to a range of contiguous values. examples of contiguous iterators are +// std::vector<>::iterator and float*. examples of non-contiguous iterators +// are std::set<>::iterator and std::insert_iterator<>. +// +// the implementation consists of two phases. the first checks that value_type +// for the iterator is not void. this must be done as for many containers void +// is not a valid value_type (ex. std::vector<void>::iterator is not valid). +// after ensuring a non-void value_type, the _is_contiguous_iterator function +// is invoked. it has specializations retuning true for all (known) contiguous +// iterators types and a default value of false. +template<class Iterator, class Enable = void> +struct is_contiguous_iterator : + public _is_contiguous_iterator< + typename boost::remove_cv<Iterator>::type + > {}; + +// value_type of void = false +template<class Iterator> +struct is_contiguous_iterator< + Iterator, + typename boost::enable_if< + typename boost::is_void< + typename Iterator::value_type + >::type + >::type +> : public boost::false_type {}; + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_DETAIL_IS_CONTIGUOUS_ITERATOR_HPP diff --git a/boost/compute/detail/iterator_plus_distance.hpp b/boost/compute/detail/iterator_plus_distance.hpp new file mode 100644 index 0000000000..26e95f16c0 --- /dev/null +++ b/boost/compute/detail/iterator_plus_distance.hpp @@ -0,0 +1,53 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DETAIL_ITERATOR_PLUS_DISTANCE_HPP +#define BOOST_COMPUTE_DETAIL_ITERATOR_PLUS_DISTANCE_HPP + +#include <iterator> + +namespace boost { +namespace compute { +namespace detail { + +template<class Iterator, class Distance, class Tag> +inline Iterator iterator_plus_distance(Iterator i, Distance n, Tag) +{ + while(n--){ i++; } + + return i; +} + +template<class Iterator, class Distance> +inline Iterator iterator_plus_distance(Iterator i, + Distance n, + std::random_access_iterator_tag) +{ + typedef typename + std::iterator_traits<Iterator>::difference_type difference_type; + + return i + static_cast<difference_type>(n); +} + +// similar to std::advance() except returns the advanced iterator and +// also works with iterators that don't define difference_type +template<class Iterator, class Distance> +inline Iterator iterator_plus_distance(Iterator i, Distance n) +{ + typedef typename std::iterator_traits<Iterator>::iterator_category tag; + + return iterator_plus_distance(i, n, tag()); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_DETAIL_ITERATOR_PLUS_DISTANCE_HPP diff --git a/boost/compute/detail/iterator_range_size.hpp b/boost/compute/detail/iterator_range_size.hpp new file mode 100644 index 0000000000..67a675f833 --- /dev/null +++ b/boost/compute/detail/iterator_range_size.hpp @@ -0,0 +1,44 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DETAIL_ITERATOR_RANGE_SIZE_H +#define BOOST_COMPUTE_DETAIL_ITERATOR_RANGE_SIZE_H + +#include <cstddef> +#include <algorithm> +#include <iterator> + +namespace boost { +namespace compute { +namespace detail { + +// This is a convenience function which returns the size of a range +// bounded by two iterators. This function has two differences from +// the std::distance() function: 1) the return type (size_t) is +// unsigned, and 2) the return value is always positive. +template<class Iterator> +inline size_t iterator_range_size(Iterator first, Iterator last) +{ + typedef typename + std::iterator_traits<Iterator>::difference_type + difference_type; + + difference_type difference = std::distance(first, last); + + return static_cast<size_t>( + (std::max)(difference, static_cast<difference_type>(0)) + ); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_DETAIL_ITERATOR_RANGE_SIZE_H diff --git a/boost/compute/detail/iterator_traits.hpp b/boost/compute/detail/iterator_traits.hpp new file mode 100644 index 0000000000..45f0f683e6 --- /dev/null +++ b/boost/compute/detail/iterator_traits.hpp @@ -0,0 +1,35 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DETAIL_ITERATOR_TRAITS_HPP +#define BOOST_COMPUTE_DETAIL_ITERATOR_TRAITS_HPP + +#include <iterator> + +#include <boost/compute/detail/is_contiguous_iterator.hpp> +#include <boost/compute/type_traits/is_device_iterator.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class Iterator> +struct iterator_traits : public std::iterator_traits<Iterator> +{ + static const bool is_contiguous = is_contiguous_iterator<Iterator>::value; + static const bool is_on_device = is_device_iterator<Iterator>::value; + static const bool is_on_host = !is_on_device; +}; + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ITERATOR_TRAITS_HPP diff --git a/boost/compute/detail/literal.hpp b/boost/compute/detail/literal.hpp new file mode 100644 index 0000000000..0d23b1d4d2 --- /dev/null +++ b/boost/compute/detail/literal.hpp @@ -0,0 +1,45 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DETAIL_LITERAL_HPP +#define BOOST_COMPUTE_DETAIL_LITERAL_HPP + +#include <iomanip> +#include <limits> +#include <sstream> + +#include <boost/type_traits/is_same.hpp> + +#include <boost/compute/types/fundamental.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class T> +std::string make_literal(T x) +{ + std::stringstream s; + s << std::setprecision(std::numeric_limits<T>::digits10) + << std::scientific + << x; + + if(boost::is_same<T, float>::value || boost::is_same<T, float_>::value){ + s << "f"; + } + + return s.str(); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_DETAIL_LITERAL_HPP diff --git a/boost/compute/detail/lru_cache.hpp b/boost/compute/detail/lru_cache.hpp new file mode 100644 index 0000000000..fe1a56f74b --- /dev/null +++ b/boost/compute/detail/lru_cache.hpp @@ -0,0 +1,139 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DETAIL_LRU_CACHE_HPP +#define BOOST_COMPUTE_DETAIL_LRU_CACHE_HPP + +#include <map> +#include <list> +#include <utility> + +#include <boost/optional.hpp> + +namespace boost { +namespace compute { +namespace detail { + +// a cache which evicts the least recently used item when it is full +template<class Key, class Value> +class lru_cache +{ +public: + typedef Key key_type; + typedef Value value_type; + typedef std::list<key_type> list_type; + typedef std::map< + key_type, + std::pair<value_type, typename list_type::iterator> + > map_type; + + lru_cache(size_t capacity) + : m_capacity(capacity) + { + } + + ~lru_cache() + { + } + + size_t size() const + { + return m_map.size(); + } + + size_t capacity() const + { + return m_capacity; + } + + bool empty() const + { + return m_map.empty(); + } + + bool contains(const key_type &key) + { + return m_map.find(key) != m_map.end(); + } + + void insert(const key_type &key, const value_type &value) + { + typename map_type::iterator i = m_map.find(key); + if(i == m_map.end()){ + // insert item into the cache, but first check if it is full + if(size() >= m_capacity){ + // cache is full, evict the least recently used item + evict(); + } + + // insert the new item + m_list.push_front(key); + m_map[key] = std::make_pair(value, m_list.begin()); + } + } + + boost::optional<value_type> get(const key_type &key) + { + // lookup value in the cache + typename map_type::iterator i = m_map.find(key); + if(i == m_map.end()){ + // value not in cache + return boost::none; + } + + // return the value, but first update its place in the most + // recently used list + typename list_type::iterator j = i->second.second; + if(j != m_list.begin()){ + // move item to the front of the most recently used list + m_list.erase(j); + m_list.push_front(key); + + // update iterator in map + j = m_list.begin(); + const value_type &value = i->second.first; + m_map[key] = std::make_pair(value, j); + + // return the value + return value; + } + else { + // the item is already at the front of the most recently + // used list so just return it + return i->second.first; + } + } + + void clear() + { + m_map.clear(); + m_list.clear(); + } + +private: + void evict() + { + // evict item from the end of most recently used list + typename list_type::iterator i = --m_list.end(); + m_map.erase(*i); + m_list.erase(i); + } + +private: + map_type m_map; + list_type m_list; + size_t m_capacity; +}; + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_DETAIL_LRU_CACHE_HPP diff --git a/boost/compute/detail/meta_kernel.hpp b/boost/compute/detail/meta_kernel.hpp new file mode 100644 index 0000000000..7be778b025 --- /dev/null +++ b/boost/compute/detail/meta_kernel.hpp @@ -0,0 +1,1054 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DETAIL_META_KERNEL_HPP +#define BOOST_COMPUTE_DETAIL_META_KERNEL_HPP + +#include <set> +#include <string> +#include <vector> +#include <iomanip> +#include <sstream> +#include <utility> + +#include <boost/tuple/tuple.hpp> +#include <boost/type_traits.hpp> +#include <boost/lexical_cast.hpp> +#include <boost/static_assert.hpp> +#include <boost/algorithm/string/find.hpp> +#include <boost/preprocessor/repetition.hpp> + +#include <boost/compute/kernel.hpp> +#include <boost/compute/closure.hpp> +#include <boost/compute/function.hpp> +#include <boost/compute/functional.hpp> +#include <boost/compute/type_traits.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/image/image2d.hpp> +#include <boost/compute/image/image_sampler.hpp> +#include <boost/compute/memory_object.hpp> +#include <boost/compute/detail/device_ptr.hpp> +#include <boost/compute/detail/sha1.hpp> +#include <boost/compute/utility/program_cache.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class T> +class meta_kernel_variable +{ +public: + typedef T result_type; + + meta_kernel_variable(const std::string &name) + : m_name(name) + { + } + + meta_kernel_variable(const meta_kernel_variable &other) + : m_name(other.m_name) + { + } + + meta_kernel_variable& operator=(const meta_kernel_variable &other) + { + if(this != &other){ + m_name = other.m_name; + } + + return *this; + } + + ~meta_kernel_variable() + { + } + + std::string name() const + { + return m_name; + } + +private: + std::string m_name; +}; + +template<class T> +class meta_kernel_literal +{ +public: + typedef T result_type; + + meta_kernel_literal(const T &value) + : m_value(value) + { + } + + meta_kernel_literal(const meta_kernel_literal &other) + : m_value(other.m_value) + { + } + + meta_kernel_literal& operator=(const meta_kernel_literal &other) + { + if(this != &other){ + m_value = other.m_value; + } + + return *this; + } + + ~meta_kernel_literal() + { + } + + const T& value() const + { + return m_value; + } + +private: + T m_value; +}; + +struct meta_kernel_stored_arg +{ + meta_kernel_stored_arg() + : m_size(0), + m_value(0) + { + } + + meta_kernel_stored_arg(const meta_kernel_stored_arg &other) + : m_size(0), + m_value(0) + { + set_value(other.m_size, other.m_value); + } + + meta_kernel_stored_arg& operator=(const meta_kernel_stored_arg &other) + { + if(this != &other){ + set_value(other.m_size, other.m_value); + } + + return *this; + } + + template<class T> + meta_kernel_stored_arg(const T &value) + : m_size(0), + m_value(0) + { + set_value(value); + } + + ~meta_kernel_stored_arg() + { + if(m_value){ + std::free(m_value); + } + } + + void set_value(size_t size, const void *value) + { + if(m_value){ + std::free(m_value); + } + + m_size = size; + + if(value){ + m_value = std::malloc(size); + std::memcpy(m_value, value, size); + } + else { + m_value = 0; + } + } + + template<class T> + void set_value(const T &value) + { + set_value(sizeof(T), boost::addressof(value)); + } + + size_t m_size; + void *m_value; +}; + +struct meta_kernel_buffer_info +{ + meta_kernel_buffer_info(const buffer &buffer, + const std::string &id, + memory_object::address_space addr_space, + size_t i) + : m_mem(buffer.get()), + identifier(id), + address_space(addr_space), + index(i) + { + } + + cl_mem m_mem; + std::string identifier; + memory_object::address_space address_space; + size_t index; +}; + +class meta_kernel; + +template<class Type> +struct inject_type_impl +{ + void operator()(meta_kernel &) + { + // default implementation does nothing + } +}; + +#define BOOST_COMPUTE_META_KERNEL_DECLARE_SCALAR_TYPE_STREAM_OPERATOR(type) \ + meta_kernel& operator<<(const type &x) \ + { \ + m_source << x; \ + return *this; \ + } + +#define BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(type) \ + meta_kernel& operator<<(const type &x) \ + { \ + m_source << "(" << type_name<type>() << ")"; \ + m_source << "("; \ + for(size_t i = 0; i < vector_size<type>::value; i++){ \ + *this << lit(x[i]); \ + \ + if(i != vector_size<type>::value - 1){ \ + m_source << ","; \ + } \ + } \ + m_source << ")"; \ + return *this; \ + } + +#define BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(type) \ + BOOST_COMPUTE_META_KERNEL_DECLARE_SCALAR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(type, _)) \ + BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(BOOST_PP_CAT(type, 2), _)) \ + BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(BOOST_PP_CAT(type, 4), _)) \ + BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(BOOST_PP_CAT(type, 8), _)) \ + BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(BOOST_PP_CAT(BOOST_PP_CAT(type, 16), _)) + +class meta_kernel +{ +public: + template<class T> + class argument + { + public: + argument(const std::string &name, size_t index) + : m_name(name), + m_index(index) + { + } + + const std::string &name() const + { + return m_name; + } + + size_t index() const + { + return m_index; + } + + private: + std::string m_name; + size_t m_index; + }; + + explicit meta_kernel(const std::string &name) + : m_name(name) + { + } + + meta_kernel(const meta_kernel &other) + { + m_source.str(other.m_source.str()); + } + + meta_kernel& operator=(const meta_kernel &other) + { + if(this != &other){ + m_source.str(other.m_source.str()); + } + + return *this; + } + + ~meta_kernel() + { + } + + std::string name() const + { + return m_name; + } + + std::string source() const + { + std::stringstream stream; + + // add pragmas + if(!m_pragmas.empty()){ + stream << m_pragmas << "\n"; + } + + // add macros + stream << "#define boost_pair_type(t1, t2) _pair_ ## t1 ## _ ## t2 ## _t\n"; + stream << "#define boost_pair_get(x, n) (n == 0 ? x.first ## x.second)\n"; + stream << "#define boost_make_pair(t1, x, t2, y) (boost_pair_type(t1, t2)) { x, y }\n"; + stream << "#define boost_tuple_get(x, n) (x.v ## n)\n"; + + // add type declaration source + stream << m_type_declaration_source.str() << "\n"; + + // add external function source + stream << m_external_function_source.str() << "\n"; + + // add kernel source + stream << "__kernel void " << m_name + << "(" << boost::join(m_args, ", ") << ")\n" + << "{\n" << m_source.str() << "\n}\n"; + + return stream.str(); + } + + kernel compile(const context &context, const std::string &options = std::string()) + { + // generate the program source + std::string source = this->source(); + + // generate cache key + std::string cache_key = "__boost_meta_kernel_" + + static_cast<std::string>(detail::sha1(source)); + + // load program cache + boost::shared_ptr<program_cache> cache = + program_cache::get_global_cache(context); + + // load (or build) program from cache + ::boost::compute::program program = + cache->get_or_build(cache_key, options, source, context); + + // create kernel + ::boost::compute::kernel kernel = program.create_kernel(name()); + + // bind stored args + for(size_t i = 0; i < m_stored_args.size(); i++){ + const detail::meta_kernel_stored_arg &arg = m_stored_args[i]; + + if(arg.m_size != 0){ + kernel.set_arg(i, arg.m_size, arg.m_value); + } + } + + // bind buffer args + for(size_t i = 0; i < m_stored_buffers.size(); i++){ + const detail::meta_kernel_buffer_info &bi = m_stored_buffers[i]; + + kernel.set_arg(bi.index, bi.m_mem); + } + + return kernel; + } + + template<class T> + size_t add_arg(const std::string &name) + { + std::stringstream stream; + stream << type<T>() << " " << name; + + // add argument to list + m_args.push_back(stream.str()); + + // return index + return m_args.size() - 1; + } + + template<class T> + size_t add_arg(memory_object::address_space address_space, + const std::string &name) + { + return add_arg_with_qualifiers<T>(address_space_prefix(address_space), name); + } + + template<class T> + void set_arg(size_t index, const T &value) + { + if(index >= m_stored_args.size()){ + m_stored_args.resize(index + 1); + } + + m_stored_args[index] = detail::meta_kernel_stored_arg(value); + } + + void set_arg(size_t index, const memory_object &mem) + { + set_arg<cl_mem>(index, mem.get()); + } + + void set_arg(size_t index, const image_sampler &sampler) + { + set_arg<cl_sampler>(index, cl_sampler(sampler)); + } + + template<class T> + size_t add_set_arg(const std::string &name, const T &value) + { + size_t index = add_arg<T>(name); + set_arg<T>(index, value); + return index; + } + + void add_extension_pragma(const std::string &extension, + const std::string &value = "enable") + { + m_pragmas += "#pragma OPENCL EXTENSION " + extension + " : " + value + "\n"; + } + + void add_extension_pragma(const std::string &extension, + const std::string &value) const + { + return const_cast<meta_kernel *>(this)->add_extension_pragma(extension, value); + } + + template<class T> + std::string type() const + { + std::stringstream stream; + + // const qualifier + if(boost::is_const<T>::value){ + stream << "const "; + } + + // volatile qualifier + if(boost::is_volatile<T>::value){ + stream << "volatile "; + } + + // type + typedef + typename boost::remove_cv< + typename boost::remove_pointer<T>::type + >::type Type; + stream << type_name<Type>(); + + // pointer + if(boost::is_pointer<T>::value){ + stream << "*"; + } + + // inject type pragmas and/or definitions + inject_type<Type>(); + + return stream.str(); + } + + template<class T> + std::string decl(const std::string &name) const + { + return type<T>() + " " + name; + } + + template<class T, class Expr> + std::string decl(const std::string &name, const Expr &init) const + { + meta_kernel tmp((std::string())); + tmp << tmp.decl<T>(name) << " = " << init; + return tmp.m_source.str(); + } + + template<class T> + detail::meta_kernel_variable<T> var(const std::string &name) const + { + type<T>(); + + return make_var<T>(name); + } + + template<class T> + detail::meta_kernel_literal<T> lit(const T &value) const + { + type<T>(); + + return detail::meta_kernel_literal<T>(value); + } + + template<class T> + detail::meta_kernel_variable<T> expr(const std::string &expr) const + { + type<T>(); + + return detail::meta_kernel_variable<T>(expr); + } + + // define stream operators for scalar and vector types + BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(char) + BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(uchar) + BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(short) + BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(ushort) + BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(int) + BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(uint) + BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(long) + BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(ulong) + BOOST_COMPUTE_META_KERNEL_DECLARE_TYPE_STREAM_OPERATORS(double) + + // define stream operators for float scalar and vector types + meta_kernel& operator<<(const float &x) + { + m_source << std::showpoint << x << 'f'; + return *this; + } + + BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(float2_) + BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(float4_) + BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(float8_) + BOOST_COMPUTE_META_KERNEL_DECLARE_VECTOR_TYPE_STREAM_OPERATOR(float16_) + + // define stream operators for variable types + template<class T> + meta_kernel& operator<<(const meta_kernel_variable<T> &variable) + { + return *this << variable.name(); + } + + // define stream operators for literal types + template<class T> + meta_kernel& operator<<(const meta_kernel_literal<T> &literal) + { + return *this << literal.value(); + } + + meta_kernel& operator<<(const meta_kernel_literal<bool> &literal) + { + return *this << (literal.value() ? "true" : "false"); + } + + meta_kernel& operator<<(const meta_kernel_literal<char> &literal) + { + const char c = literal.value(); + + switch(c){ + // control characters + case '\0': + return *this << "'\\0'"; + case '\a': + return *this << "'\\a'"; + case '\b': + return *this << "'\\b'"; + case '\t': + return *this << "'\\t'"; + case '\n': + return *this << "'\\n'"; + case '\v': + return *this << "'\\v'"; + case '\f': + return *this << "'\\f'"; + case '\r': + return *this << "'\\r'"; + + // characters which need escaping + case '\"': + case '\'': + case '\?': + case '\\': + return *this << "'\\" << c << "'"; + + // all other characters + default: + return *this << "'" << c << "'"; + } + } + + meta_kernel& operator<<(const meta_kernel_literal<signed char> &literal) + { + return *this << lit<char>(literal.value()); + } + + meta_kernel& operator<<(const meta_kernel_literal<unsigned char> &literal) + { + return *this << uint_(literal.value()); + } + + // define stream operators for strings + meta_kernel& operator<<(char ch) + { + m_source << ch; + return *this; + } + + meta_kernel& operator<<(const char *string) + { + m_source << string; + return *this; + } + + meta_kernel& operator<<(const std::string &string) + { + m_source << string; + return *this; + } + + template<class T> + static detail::meta_kernel_variable<T> make_var(const std::string &name) + { + return detail::meta_kernel_variable<T>(name); + } + + template<class T> + static detail::meta_kernel_literal<T> make_lit(const T &value) + { + return detail::meta_kernel_literal<T>(value); + } + + template<class T> + static detail::meta_kernel_variable<T> make_expr(const std::string &expr) + { + return detail::meta_kernel_variable<T>(expr); + } + + event exec(command_queue &queue) + { + return exec_1d(queue, 0, 1); + } + + event exec_1d(command_queue &queue, + size_t global_work_offset, + size_t global_work_size) + { + const context &context = queue.get_context(); + + ::boost::compute::kernel kernel = compile(context); + + return queue.enqueue_1d_range_kernel( + kernel, + global_work_offset, + global_work_size, + 0 + ); + } + + event exec_1d(command_queue &queue, + size_t global_work_offset, + size_t global_work_size, + size_t local_work_size) + { + const context &context = queue.get_context(); + + ::boost::compute::kernel kernel = compile(context); + + return queue.enqueue_1d_range_kernel( + kernel, + global_work_offset, + global_work_size, + local_work_size + ); + } + + template<class T> + std::string get_buffer_identifier(const buffer &buffer, + const memory_object::address_space address_space = + memory_object::global_memory) + { + // check if we've already seen buffer + for(size_t i = 0; i < m_stored_buffers.size(); i++){ + const detail::meta_kernel_buffer_info &bi = m_stored_buffers[i]; + + if(bi.m_mem == buffer.get() && + bi.address_space == address_space){ + return bi.identifier; + } + } + + // create a new binding + std::string identifier = + "_buf" + lexical_cast<std::string>(m_stored_buffers.size()); + size_t index = add_arg<T *>(address_space, identifier); + + // store new buffer info + m_stored_buffers.push_back( + detail::meta_kernel_buffer_info(buffer, identifier, address_space, index)); + + return identifier; + } + + std::string get_image_identifier(const char *qualifiers, const image2d &image) + { + size_t index = add_arg_with_qualifiers<image2d>(qualifiers, "image"); + + set_arg(index, image); + + return "image"; + } + + std::string get_sampler_identifier(bool normalized_coords, + cl_addressing_mode addressing_mode, + cl_filter_mode filter_mode) + { + (void) normalized_coords; + (void) addressing_mode; + (void) filter_mode; + + m_pragmas += "const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |\n" + " CLK_ADDRESS_NONE |\n" + " CLK_FILTER_NEAREST;\n"; + + return "sampler"; + } + + template<class Expr> + static std::string expr_to_string(const Expr &expr) + { + meta_kernel tmp((std::string())); + tmp << expr; + return tmp.m_source.str(); + } + + template<class Predicate> + detail::invoked_function<bool, boost::tuple<Predicate> > if_(Predicate pred) const + { + return detail::invoked_function<bool, boost::tuple<Predicate> >( + "if", std::string(), boost::make_tuple(pred) + ); + } + + template<class Predicate> + detail::invoked_function<bool, boost::tuple<Predicate> > else_if_(Predicate pred) const + { + return detail::invoked_function<bool, boost::tuple<Predicate> >( + "else if", std::string(), boost::make_tuple(pred) + ); + } + + detail::meta_kernel_variable<cl_uint> get_global_id(size_t dim) const + { + return expr<cl_uint>("get_global_id(" + lexical_cast<std::string>(dim) + ")"); + } + + void add_function(const std::string &name, const std::string &source) + { + if(m_external_function_names.count(name)){ + return; + } + + m_external_function_names.insert(name); + m_external_function_source << source << "\n"; + } + + void add_function(const std::string &name, + const std::string &source, + const std::map<std::string, std::string> &definitions) + { + typedef std::map<std::string, std::string>::const_iterator iter; + + std::stringstream s; + + // add #define's + for(iter i = definitions.begin(); i != definitions.end(); i++){ + s << "#define " << i->first; + if(!i->second.empty()){ + s << " " << i->second; + } + s << "\n"; + } + + s << source << "\n"; + + // add #undef's + for(iter i = definitions.begin(); i != definitions.end(); i++){ + s << "#undef " << i->first << "\n"; + } + + add_function(name, s.str()); + } + + template<class Type> + void add_type_declaration(const std::string &declaration) + { + const char *name = type_name<Type>(); + + // check if the type has already been declared + std::string source = m_type_declaration_source.str(); + if(source.find(name) != std::string::npos){ + return; + } + + m_type_declaration_source << declaration; + } + + template<class Type> + void inject_type() const + { + inject_type_impl<Type>()(const_cast<meta_kernel &>(*this)); + } + + // the insert_function_call() method inserts a call to a function with + // the given name tuple of argument values. + template<class ArgTuple> + void insert_function_call(const std::string &name, const ArgTuple &args) + { + *this << name << '('; + insert_function_call_args(args); + *this << ')'; + } + + // the insert_function_call_args() method takes a tuple of argument values + // and inserts them into the source string with a comma in-between each. + // this is useful for creating function calls given a tuple of values. + void insert_function_call_args(const boost::tuple<>&) + { + } + + #define BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARG_TYPE(z, n, unused) \ + inject_type<BOOST_PP_CAT(T, n)>(); + + #define BOOST_COMPUTE_META_KERNEL_STREAM_FUNCTION_ARG(z, n, unused) \ + << boost::get<BOOST_PP_DEC(n)>(args) << ", " + + #define BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARGS(z, n, unused) \ + template<BOOST_PP_ENUM_PARAMS(n, class T)> \ + void insert_function_call_args( \ + const boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> &args \ + ) \ + { \ + BOOST_PP_REPEAT_FROM_TO( \ + 0, n, BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARG_TYPE, ~ \ + ) \ + *this \ + BOOST_PP_REPEAT_FROM_TO( \ + 1, n, BOOST_COMPUTE_META_KERNEL_STREAM_FUNCTION_ARG, ~ \ + ) \ + << boost::get<BOOST_PP_DEC(n)>(args); \ + } + + BOOST_PP_REPEAT_FROM_TO( + 1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARGS, ~ + ) + + #undef BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARG_TYPE + #undef BOOST_COMPUTE_META_KERNEL_STREAM_FUNCTION_ARG + #undef BOOST_COMPUTE_META_KERNEL_INSERT_FUNCTION_ARGS + + static const char* address_space_prefix(const memory_object::address_space value) + { + switch(value){ + case memory_object::global_memory: return "__global"; + case memory_object::local_memory: return "__local"; + case memory_object::private_memory: return "__private"; + case memory_object::constant_memory: return "__constant"; + }; + + return 0; // unreachable + } + +private: + template<class T> + size_t add_arg_with_qualifiers(const char *qualifiers, const std::string &name) + { + size_t index = add_arg<T>(name); + + // update argument type declaration with qualifiers + std::stringstream s; + s << qualifiers << " " << m_args[index]; + m_args[index] = s.str(); + + return index; + } + +private: + std::string m_name; + std::stringstream m_source; + std::stringstream m_external_function_source; + std::stringstream m_type_declaration_source; + std::set<std::string> m_external_function_names; + std::vector<std::string> m_args; + std::string m_pragmas; + std::vector<detail::meta_kernel_stored_arg> m_stored_args; + std::vector<detail::meta_kernel_buffer_info> m_stored_buffers; +}; + +template<class ResultType, class ArgTuple> +inline meta_kernel& +operator<<(meta_kernel &kernel, const invoked_function<ResultType, ArgTuple> &expr) +{ + if(!expr.source().empty()){ + kernel.add_function(expr.name(), expr.source(), expr.definitions()); + } + + kernel.insert_function_call(expr.name(), expr.args()); + + return kernel; +} + +template<class ResultType, class ArgTuple, class CaptureTuple> +inline meta_kernel& +operator<<(meta_kernel &kernel, + const invoked_closure<ResultType, ArgTuple, CaptureTuple> &expr) +{ + if(!expr.source().empty()){ + kernel.add_function(expr.name(), expr.source(), expr.definitions()); + } + + kernel << expr.name() << '('; + kernel.insert_function_call_args(expr.args()); + kernel << ", "; + kernel.insert_function_call_args(expr.capture()); + kernel << ')'; + + return kernel; +} + +template<class Arg1, class Arg2, class Result> +inline meta_kernel& operator<<(meta_kernel &kernel, + const invoked_binary_operator<Arg1, + Arg2, + Result> &expr) +{ + return kernel << "((" << expr.arg1() << ")" + << expr.op() + << "(" << expr.arg2() << "))"; +} + +template<class T, class IndexExpr> +inline meta_kernel& operator<<(meta_kernel &kernel, + const detail::device_ptr_index_expr<T, IndexExpr> &expr) +{ + if(expr.m_index == 0){ + return kernel << + kernel.get_buffer_identifier<T>(expr.m_buffer) << + '[' << expr.m_expr << ']'; + } + else { + return kernel << + kernel.get_buffer_identifier<T>(expr.m_buffer) << + '[' << expr.m_index << "+(" << expr.m_expr << ")]"; + } +} + +template<class T1, class T2, class IndexExpr> +inline meta_kernel& operator<<(meta_kernel &kernel, + const detail::device_ptr_index_expr<std::pair<T1, T2>, IndexExpr> &expr) +{ + typedef std::pair<T1, T2> T; + + if(expr.m_index == 0){ + return kernel << + kernel.get_buffer_identifier<T>(expr.m_buffer) << + '[' << expr.m_expr << ']'; + } + else { + return kernel << + kernel.get_buffer_identifier<T>(expr.m_buffer) << + '[' << expr.m_index << "+(" << expr.m_expr << ")]"; + } +} + +template<class Predicate, class Arg> +inline meta_kernel& operator<<(meta_kernel &kernel, + const invoked_unary_negate_function<Predicate, + Arg> &expr) +{ + return kernel << "!(" << expr.pred()(expr.expr()) << ')'; +} + +template<class Predicate, class Arg1, class Arg2> +inline meta_kernel& operator<<(meta_kernel &kernel, + const invoked_binary_negate_function<Predicate, + Arg1, + Arg2> &expr) +{ + return kernel << "!(" << expr.pred()(expr.expr1(), expr.expr2()) << ')'; +} + +// get<N>() for vector types +template<size_t N, class Arg, class T> +inline meta_kernel& operator<<(meta_kernel &kernel, + const invoked_get<N, Arg, T> &expr) +{ + BOOST_STATIC_ASSERT(N < 16); + + if(N < 10){ + return kernel << expr.m_arg << ".s" << uint_(N); + } + else if(N < 16){ +#ifdef _MSC_VER +# pragma warning(push) +# pragma warning(disable: 4307) +#endif + return kernel << expr.m_arg << ".s" << char('a' + (N - 10)); +#ifdef _MSC_VER +# pragma warning(pop) +#endif + } + + return kernel; +} + +template<class T, class Arg> +inline meta_kernel& operator<<(meta_kernel &kernel, + const invoked_field<T, Arg> &expr) +{ + return kernel << expr.m_arg << "." << expr.m_field; +} + +template<class T, class Arg> +inline meta_kernel& operator<<(meta_kernel &k, + const invoked_as<T, Arg> &expr) +{ + return k << "as_" << type_name<T>() << "(" << expr.m_arg << ")"; +} + +template<class T, class Arg> +inline meta_kernel& operator<<(meta_kernel &k, + const invoked_convert<T, Arg> &expr) +{ + return k << "convert_" << type_name<T>() << "(" << expr.m_arg << ")"; +} + +template<class T, class Arg> +inline meta_kernel& operator<<(meta_kernel &k, + const invoked_identity<T, Arg> &expr) +{ + return k << expr.m_arg; +} + +template<> +struct inject_type_impl<double_> +{ + void operator()(meta_kernel &kernel) + { + kernel.add_extension_pragma("cl_khr_fp64", "enable"); + } +}; + +template<class Scalar, size_t N> +struct inject_type_impl<vector_type<Scalar, N> > +{ + void operator()(meta_kernel &kernel) + { + kernel.inject_type<Scalar>(); + } +}; + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_DETAIL_META_KERNEL_HPP diff --git a/boost/compute/detail/mpl_vector_to_tuple.hpp b/boost/compute/detail/mpl_vector_to_tuple.hpp new file mode 100644 index 0000000000..292a6e36e1 --- /dev/null +++ b/boost/compute/detail/mpl_vector_to_tuple.hpp @@ -0,0 +1,65 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DETAIL_MPL_VECTOR_TO_TUPLE_HPP +#define BOOST_COMPUTE_DETAIL_MPL_VECTOR_TO_TUPLE_HPP + +#include <boost/mpl/copy.hpp> +#include <boost/mpl/vector.hpp> +#include <boost/tuple/tuple.hpp> +#include <boost/fusion/include/mpl.hpp> +#include <boost/fusion/adapted/boost_tuple.hpp> +#include <boost/preprocessor/repetition.hpp> + +#include <boost/compute/config.hpp> + +namespace boost { +namespace compute { +namespace detail { + +namespace mpl = boost::mpl; + +template<class Vector, size_t N> +struct mpl_vector_to_tuple_impl; + +#define BOOST_COMPUTE_PRINT_ELEM(z, n, unused) \ + typename mpl::at_c<Vector, n>::type + +#define BOOST_COMPUTE_VEC2TUP(z, n, unused) \ +template<class Vector> \ +struct mpl_vector_to_tuple_impl<Vector, n> \ +{ \ + typedef typename \ + boost::tuple< \ + BOOST_PP_ENUM(n, BOOST_COMPUTE_PRINT_ELEM, ~) \ + > type; \ +}; + +BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_VEC2TUP, ~) + +#undef BOOST_COMPUTE_VEC2TUP +#undef BOOST_COMPUTE_PRINT_ELEM + +// meta-function which converts a mpl::vector to a boost::tuple +template<class Vector> +struct mpl_vector_to_tuple +{ + typedef typename + mpl_vector_to_tuple_impl< + Vector, + mpl::size<Vector>::value + >::type type; +}; + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_DETAIL_MPL_VECTOR_TO_TUPLE_HPP diff --git a/boost/compute/detail/nvidia_compute_capability.hpp b/boost/compute/detail/nvidia_compute_capability.hpp new file mode 100644 index 0000000000..3f859562bd --- /dev/null +++ b/boost/compute/detail/nvidia_compute_capability.hpp @@ -0,0 +1,60 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DETAIL_NVIDIA_COMPUTE_CAPABILITY_HPP +#define BOOST_COMPUTE_DETAIL_NVIDIA_COMPUTE_CAPABILITY_HPP + +#include <boost/compute/device.hpp> + +#ifdef BOOST_COMPUTE_HAVE_HDR_CL_EXT + #include <CL/cl_ext.h> +#endif + +namespace boost { +namespace compute { +namespace detail { + +#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV + #define BOOST_COMPUTE_CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV +#else + #define BOOST_COMPUTE_CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000 +#endif + +#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV + #define BOOST_COMPUTE_CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV +#else + #define BOOST_COMPUTE_CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001 +#endif + +inline void get_nvidia_compute_capability(const device &device, int &major, int &minor) +{ + if(!device.supports_extension("cl_nv_device_attribute_query")){ + major = minor = 0; + return; + } + + major = device.get_info<uint_>(BOOST_COMPUTE_CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV); + minor = device.get_info<uint_>(BOOST_COMPUTE_CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV); +} + +inline bool check_nvidia_compute_capability(const device &device, int major, int minor) +{ + int actual_major, actual_minor; + get_nvidia_compute_capability(device, actual_major, actual_minor); + + return actual_major > major || + (actual_major == major && actual_minor >= minor); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_DETAIL_NVIDIA_COMPUTE_CAPABILITY_HPP diff --git a/boost/compute/detail/parameter_cache.hpp b/boost/compute/detail/parameter_cache.hpp new file mode 100644 index 0000000000..2a856311e1 --- /dev/null +++ b/boost/compute/detail/parameter_cache.hpp @@ -0,0 +1,215 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DETAIL_PARAMETER_CACHE_HPP +#define BOOST_COMPUTE_DETAIL_PARAMETER_CACHE_HPP + +#include <algorithm> +#include <string> + +#include <boost/shared_ptr.hpp> +#include <boost/make_shared.hpp> +#include <boost/noncopyable.hpp> + +#include <boost/compute/config.hpp> +#include <boost/compute/device.hpp> +#include <boost/compute/detail/global_static.hpp> +#include <boost/compute/version.hpp> + +#ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE +#include <boost/algorithm/string/trim.hpp> +#include <boost/compute/detail/path.hpp> +#include <boost/property_tree/ptree.hpp> +#include <boost/property_tree/json_parser.hpp> +#endif // BOOST_COMPUTE_USE_OFFLINE_CACHE + +namespace boost { +namespace compute { +namespace detail { + +class parameter_cache : boost::noncopyable +{ +public: + parameter_cache(const device &device) + : m_dirty(false), + m_device_name(device.name()) + { + #ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE + // get offline cache file name (e.g. /home/user/.boost_compute/tune/device.json) + m_file_name = make_file_name(); + + // load parameters from offline cache file (if it exists) + if(boost::filesystem::exists(m_file_name)){ + read_from_disk(); + } + #endif // BOOST_COMPUTE_USE_OFFLINE_CACHE + } + + ~parameter_cache() + { + #ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE + write_to_disk(); + #endif // BOOST_COMPUTE_USE_OFFLINE_CACHE + } + + void set(const std::string &object, const std::string ¶meter, uint_ value) + { + m_cache[std::make_pair(object, parameter)] = value; + + // set the dirty flag to true. this will cause the updated parameters + // to be stored to disk. + m_dirty = true; + } + + uint_ get(const std::string &object, const std::string ¶meter, uint_ default_value) + { + std::map<std::pair<std::string, std::string>, uint_>::iterator + iter = m_cache.find(std::make_pair(object, parameter)); + if(iter != m_cache.end()){ + return iter->second; + } + else { + return default_value; + } + } + + static boost::shared_ptr<parameter_cache> get_global_cache(const device &device) + { + // device name -> parameter cache + typedef std::map<std::string, boost::shared_ptr<parameter_cache> > cache_map; + + BOOST_COMPUTE_DETAIL_GLOBAL_STATIC(cache_map, caches, ((std::less<std::string>()))); + + cache_map::iterator iter = caches.find(device.name()); + if(iter == caches.end()){ + boost::shared_ptr<parameter_cache> cache = + boost::make_shared<parameter_cache>(device); + + caches.insert(iter, std::make_pair(device.name(), cache)); + + return cache; + } + else { + return iter->second; + } + } + +private: +#ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE + // returns a string containing a cannoical device name + static std::string cannonical_device_name(std::string name) + { + boost::algorithm::trim(name); + std::replace(name.begin(), name.end(), ' ', '_'); + std::replace(name.begin(), name.end(), '(', '_'); + std::replace(name.begin(), name.end(), ')', '_'); + return name; + } + + // returns the boost.compute version string + static std::string version_string() + { + char buf[32]; + std::snprintf(buf, sizeof(buf), "%d.%d.%d", BOOST_COMPUTE_VERSION_MAJOR, + BOOST_COMPUTE_VERSION_MINOR, + BOOST_COMPUTE_VERSION_PATCH); + return buf; + } + + // returns the file path for the cached parameters + std::string make_file_name() const + { + return detail::parameter_cache_path(true) + cannonical_device_name(m_device_name) + ".json"; + } + + // store current parameters to disk + void write_to_disk() + { + BOOST_ASSERT(!m_file_name.empty()); + + if(m_dirty){ + // save current parameters to disk + boost::property_tree::ptree pt; + pt.put("header.device", m_device_name); + pt.put("header.version", version_string()); + typedef std::map<std::pair<std::string, std::string>, uint_> map_type; + for(map_type::const_iterator iter = m_cache.begin(); iter != m_cache.end(); ++iter){ + const std::pair<std::string, std::string> &key = iter->first; + pt.add(key.first + "." + key.second, iter->second); + } + write_json(m_file_name, pt); + + m_dirty = false; + } + } + + // load stored parameters from disk + void read_from_disk() + { + BOOST_ASSERT(!m_file_name.empty()); + + m_cache.clear(); + + boost::property_tree::ptree pt; + try { + read_json(m_file_name, pt); + } + catch(boost::property_tree::json_parser::json_parser_error &e){ + // no saved cache file, ignore + return; + } + + std::string stored_device; + try { + stored_device = pt.get<std::string>("header.device"); + } + catch(boost::property_tree::ptree_bad_path&){ + return; + } + + std::string stored_version; + try { + stored_version = pt.get<std::string>("header.version"); + } + catch(boost::property_tree::ptree_bad_path&){ + return; + } + + if(stored_device == m_device_name && stored_version == version_string()){ + typedef boost::property_tree::ptree::const_iterator pt_iter; + for(pt_iter iter = pt.begin(); iter != pt.end(); ++iter){ + if(iter->first == "header"){ + // skip header + continue; + } + + boost::property_tree::ptree child_pt = pt.get_child(iter->first); + for(pt_iter child_iter = child_pt.begin(); child_iter != child_pt.end(); ++child_iter){ + set(iter->first, child_iter->first, boost::lexical_cast<uint_>(child_iter->second.data())); + } + } + } + + m_dirty = false; + } +#endif // BOOST_COMPUTE_USE_OFFLINE_CACHE + +private: + bool m_dirty; + std::string m_device_name; + std::string m_file_name; + std::map<std::pair<std::string, std::string>, uint_> m_cache; +}; + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_DETAIL_PARAMETER_CACHE_HPP diff --git a/boost/compute/detail/path.hpp b/boost/compute/detail/path.hpp new file mode 100644 index 0000000000..ec8760eaf9 --- /dev/null +++ b/boost/compute/detail/path.hpp @@ -0,0 +1,73 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DETAIL_PATH_HPP +#define BOOST_COMPUTE_DETAIL_PATH_HPP + +#include <boost/filesystem/path.hpp> +#include <boost/filesystem/operations.hpp> +#include <boost/compute/detail/getenv.hpp> + +namespace boost { +namespace compute { +namespace detail { + +// Path delimiter symbol for the current OS. +static const std::string& path_delim() +{ + static const std::string delim = + boost::filesystem::path("/").make_preferred().string(); + return delim; +} + +// Path to appdata folder. +inline const std::string& appdata_path() +{ + #ifdef WIN32 + static const std::string appdata = detail::getenv("APPDATA") + + path_delim() + "boost_compute"; + #else + static const std::string appdata = detail::getenv("HOME") + + path_delim() + ".boost_compute"; + #endif + return appdata; +} + +// Path to cached binaries. +inline std::string program_binary_path(const std::string &hash, bool create = false) +{ + std::string dir = detail::appdata_path() + path_delim() + + hash.substr(0, 2) + path_delim() + + hash.substr(2); + + if(create && !boost::filesystem::exists(dir)){ + boost::filesystem::create_directories(dir); + } + + return dir + path_delim(); +} + +// Path to parameter caches. +inline std::string parameter_cache_path(bool create = false) +{ + const static std::string dir = appdata_path() + path_delim() + "tune"; + + if(create && !boost::filesystem::exists(dir)){ + boost::filesystem::create_directories(dir); + } + + return dir + path_delim(); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_DETAIL_PATH_HPP diff --git a/boost/compute/detail/print_range.hpp b/boost/compute/detail/print_range.hpp new file mode 100644 index 0000000000..bfe02f6828 --- /dev/null +++ b/boost/compute/detail/print_range.hpp @@ -0,0 +1,82 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DETAIL_PRINT_RANGE_HPP +#define BOOST_COMPUTE_DETAIL_PRINT_RANGE_HPP + +#include <vector> +#include <iostream> +#include <iterator> + +#include <boost/compute/algorithm/copy.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/detail/is_buffer_iterator.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class InputIterator> +inline void print_range(InputIterator first, + InputIterator last, + command_queue &queue, + typename boost::enable_if< + is_buffer_iterator<InputIterator> + >::type* = 0) +{ + typedef typename + std::iterator_traits<InputIterator>::value_type + value_type; + + const size_t size = iterator_range_size(first, last); + + // copy values to temporary vector on the host + std::vector<value_type> tmp(size); + ::boost::compute::copy(first, last, tmp.begin(), queue); + + // print values + std::cout << "[ "; + for(size_t i = 0; i < size; i++){ + std::cout << tmp[i]; + if(i != size - 1){ + std::cout << ", "; + } + } + std::cout << " ]" << std::endl; +} + +template<class InputIterator> +inline void print_range(InputIterator first, + InputIterator last, + command_queue &queue, + typename boost::enable_if_c< + !is_buffer_iterator<InputIterator>::value + >::type* = 0) +{ + typedef typename + std::iterator_traits<InputIterator>::value_type + value_type; + + const context &context = queue.get_context(); + const size_t size = iterator_range_size(first, last); + + // copy values to temporary vector on the device + ::boost::compute::vector<value_type> tmp(size, context); + ::boost::compute::copy(first, last, tmp.begin(), queue); + + print_range(tmp.begin(), tmp.end(), queue); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_DETAIL_PRINT_RANGE_HPP diff --git a/boost/compute/detail/read_write_single_value.hpp b/boost/compute/detail/read_write_single_value.hpp new file mode 100644 index 0000000000..fde40d946c --- /dev/null +++ b/boost/compute/detail/read_write_single_value.hpp @@ -0,0 +1,77 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DETAIL_READ_WRITE_SINGLE_VALUE_HPP +#define BOOST_COMPUTE_DETAIL_READ_WRITE_SINGLE_VALUE_HPP + +#include <boost/throw_exception.hpp> + +#include <boost/compute/buffer.hpp> +#include <boost/compute/exception.hpp> +#include <boost/compute/command_queue.hpp> + +namespace boost { +namespace compute { +namespace detail { + +// reads and returns a single value at index in the buffer +template<class T> +inline T read_single_value(const buffer &buffer, + size_t index, + command_queue &queue) +{ + BOOST_ASSERT(index < buffer.size() / sizeof(T)); + BOOST_ASSERT(buffer.get_context() == queue.get_context()); + + T value; + queue.enqueue_read_buffer(buffer, + sizeof(T) * index, + sizeof(T), + &value); + return value; +} + +// reads and returns a the first value in the buffer +template<class T> +inline T read_single_value(const buffer &buffer, command_queue &queue) +{ + return read_single_value<T>(buffer, 0, queue); +} + +// writes a single value at index to the buffer +template<class T> +inline void write_single_value(const T &value, + const buffer &buffer, + size_t index, + command_queue &queue) +{ + BOOST_ASSERT(index < buffer.size() / sizeof(T)); + BOOST_ASSERT(buffer.get_context() == queue.get_context()); + + queue.enqueue_write_buffer(buffer, + index * sizeof(T), + sizeof(T), + &value); +} + +// writes value to the first location in buffer +template<class T> +inline void write_single_value(const T &value, + const buffer &buffer, + command_queue &queue) +{ + write_single_value<T>(value, buffer, 0, queue); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_DETAIL_READ_WRITE_SINGLE_VALUE_HPP diff --git a/boost/compute/detail/sha1.hpp b/boost/compute/detail/sha1.hpp new file mode 100644 index 0000000000..5685fa4407 --- /dev/null +++ b/boost/compute/detail/sha1.hpp @@ -0,0 +1,53 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DETAIL_SHA1_HPP +#define BOOST_COMPUTE_DETAIL_SHA1_HPP + +#include <sstream> +#include <iomanip> +#include <boost/uuid/sha1.hpp> + +namespace boost { +namespace compute { +namespace detail { + +// Accumulates SHA1 hash of the passed strings. +class sha1 { + public: + sha1(const std::string &s = "") { + if (!s.empty()) this->process(s); + } + + sha1& process(const std::string &s) { + h.process_bytes(s.c_str(), s.size()); + return *this; + } + + operator std::string() { + unsigned int digest[5]; + h.get_digest(digest); + + std::ostringstream buf; + for(int i = 0; i < 5; ++i) + buf << std::hex << std::setfill('0') << std::setw(8) << digest[i]; + + return buf.str(); + } + private: + boost::uuids::detail::sha1 h; +}; + +} // end detail namespace +} // end compute namespace +} // end boost namespace + + +#endif // BOOST_COMPUTE_DETAIL_SHA1_HPP diff --git a/boost/compute/detail/variadic_macros.hpp b/boost/compute/detail/variadic_macros.hpp new file mode 100644 index 0000000000..60f44bd1a8 --- /dev/null +++ b/boost/compute/detail/variadic_macros.hpp @@ -0,0 +1,35 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DETAIL_VARIDAIC_MACROS_HPP +#define BOOST_COMPUTE_DETAIL_VARIDAIC_MACROS_HPP + +#include <boost/preprocessor/cat.hpp> +#include <boost/preprocessor/config/config.hpp> +#include <boost/preprocessor/tuple/to_seq.hpp> + +#if BOOST_PP_VARIADICS == 1 +# include <boost/preprocessor/variadic/size.hpp> +#endif + +#ifdef BOOST_PP_VARIADIC_SIZE +# define BOOST_COMPUTE_PP_VARIADIC_SIZE BOOST_PP_VARIADIC_SIZE +#else +# define BOOST_COMPUTE_PP_VARIADIC_SIZE(...) BOOST_COMPUTE_PP_VARIADIC_SIZE_I(__VA_ARGS__, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,) +# define BOOST_COMPUTE_PP_VARIADIC_SIZE_I(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, e32, e33, e34, e35, e36, e37, e38, e39, e40, e41, e42, e43, e44, e45, e46, e47, e48, e49, e50, e51, e52, e53, e54, e55, e56, e57, e58, e59, e60, e61, e62, e63, size, ...) size +#endif + +#define BOOST_COMPUTE_PP_TUPLE_SIZE(tuple) \ + BOOST_COMPUTE_PP_VARIADIC_SIZE tuple + +#define BOOST_COMPUTE_PP_TUPLE_TO_SEQ(tuple) \ + BOOST_PP_TUPLE_TO_SEQ(BOOST_COMPUTE_PP_TUPLE_SIZE(tuple), tuple) + +#endif // BOOST_COMPUTE_DETAIL_VARIDAIC_MACROS_HPP diff --git a/boost/compute/detail/vendor.hpp b/boost/compute/detail/vendor.hpp new file mode 100644 index 0000000000..0aa9c9c0d4 --- /dev/null +++ b/boost/compute/detail/vendor.hpp @@ -0,0 +1,38 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DETAIL_VENDOR_HPP +#define BOOST_COMPUTE_DETAIL_VENDOR_HPP + +#include <boost/compute/device.hpp> +#include <boost/compute/platform.hpp> + +namespace boost { +namespace compute { +namespace detail { + +// returns true if the device is an nvidia gpu +inline bool is_nvidia_device(const device &device) +{ + std::string nvidia("NVIDIA"); + return device.vendor().compare(0, nvidia.size(), nvidia) == 0; +} + +// returns true if the device is an amd cpu or gpu +inline bool is_amd_device(const device &device) +{ + return device.platform().vendor() == "Advanced Micro Devices, Inc."; +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_DETAIL_VENDOR_HPP diff --git a/boost/compute/detail/work_size.hpp b/boost/compute/detail/work_size.hpp new file mode 100644 index 0000000000..552d797b8b --- /dev/null +++ b/boost/compute/detail/work_size.hpp @@ -0,0 +1,37 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DETAIL_WORK_SIZE_HPP +#define BOOST_COMPUTE_DETAIL_WORK_SIZE_HPP + +#include <cmath> + +namespace boost { +namespace compute { +namespace detail { + +// Given a total number of values (count), a number of values to +// process per thread (vtp), and a number of threads to execute per +// block (tpb), this function returns the global work size to be +// passed to clEnqueueNDRangeKernel() for a 1D algorithm. +inline size_t calculate_work_size(size_t count, size_t vpt, size_t tpb) +{ + size_t work_size = static_cast<size_t>(std::ceil(float(count) / vpt)); + if(work_size % tpb != 0){ + work_size += tpb - work_size % tpb; + } + return work_size; +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_DETAIL_WORK_SIZE_HPP diff --git a/boost/compute/device.hpp b/boost/compute/device.hpp new file mode 100644 index 0000000000..5cf2e8c931 --- /dev/null +++ b/boost/compute/device.hpp @@ -0,0 +1,584 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_DEVICE_HPP +#define BOOST_COMPUTE_DEVICE_HPP + +#include <algorithm> +#include <string> +#include <vector> + +#include <boost/algorithm/string/split.hpp> +#include <boost/algorithm/string/classification.hpp> + +#include <boost/compute/config.hpp> +#include <boost/compute/exception.hpp> +#include <boost/compute/types/fundamental.hpp> +#include <boost/compute/detail/get_object_info.hpp> +#include <boost/compute/detail/assert_cl_success.hpp> + +namespace boost { +namespace compute { + +class platform; + +/// \class device +/// \brief A compute device. +/// +/// Typical compute devices include GPUs and multi-core CPUs. A list +/// of all compute devices available on a platform can be obtained +/// via the platform::devices() method. +/// +/// The default compute device for the system can be obtained with +/// the system::default_device() method. For example: +/// +/// \snippet test/test_device.cpp default_gpu +/// +/// \see platform, context, command_queue +class device +{ +public: + enum type { + cpu = CL_DEVICE_TYPE_CPU, + gpu = CL_DEVICE_TYPE_GPU, + accelerator = CL_DEVICE_TYPE_ACCELERATOR + }; + + /// Creates a null device object. + device() + : m_id(0) + { + } + + /// Creates a new device object for \p id. If \p retain is \c true, + /// the reference count for the device will be incremented. + explicit device(cl_device_id id, bool retain = true) + : m_id(id) + { + #ifdef CL_VERSION_1_2 + if(m_id && retain && is_subdevice()){ + clRetainDevice(m_id); + } + #else + (void) retain; + #endif + } + + /// Creates a new device object as a copy of \p other. + device(const device &other) + : m_id(other.m_id) + { + #ifdef CL_VERSION_1_2 + if(m_id && is_subdevice()){ + clRetainDevice(m_id); + } + #endif + } + + /// Copies the device from \p other to \c *this. + device& operator=(const device &other) + { + if(this != &other){ + #ifdef CL_VERSION_1_2 + if(m_id && is_subdevice()){ + clReleaseDevice(m_id); + } + #endif + + m_id = other.m_id; + + #ifdef CL_VERSION_1_2 + if(m_id && is_subdevice()){ + clRetainDevice(m_id); + } + #endif + } + + return *this; + } + + #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES + /// Move-constructs a new device object from \p other. + device(device&& other) BOOST_NOEXCEPT + : m_id(other.m_id) + { + other.m_id = 0; + } + + /// Move-assigns the device from \p other to \c *this. + device& operator=(device&& other) BOOST_NOEXCEPT + { + #ifdef CL_VERSION_1_2 + if(m_id && is_subdevice()){ + clReleaseDevice(m_id); + } + #endif + + m_id = other.m_id; + other.m_id = 0; + + return *this; + } + #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES + + /// Destroys the device object. + ~device() + { + #ifdef CL_VERSION_1_2 + if(m_id && is_subdevice()){ + BOOST_COMPUTE_ASSERT_CL_SUCCESS( + clReleaseDevice(m_id) + ); + } + #endif + } + + /// Returns the ID of the device. + cl_device_id id() const + { + return m_id; + } + + /// Returns a reference to the underlying OpenCL device id. + cl_device_id& get() const + { + return const_cast<cl_device_id&>(m_id); + } + + /// Returns the type of the device. + cl_device_type type() const + { + return get_info<cl_device_type>(CL_DEVICE_TYPE); + } + + #ifdef BOOST_COMPUTE_DOXYGEN_INVOKED + /// Returns the platform for the device. + platform platform() const; + #else + boost::compute::platform platform() const; + #endif + + /// Returns the name of the device. + std::string name() const + { + return get_info<std::string>(CL_DEVICE_NAME); + } + + /// Returns the name of the vendor for the device. + std::string vendor() const + { + return get_info<std::string>(CL_DEVICE_VENDOR); + } + + /// Returns the device profile string. + std::string profile() const + { + return get_info<std::string>(CL_DEVICE_PROFILE); + } + + /// Returns the device version string. + std::string version() const + { + return get_info<std::string>(CL_DEVICE_VERSION); + } + + /// Returns the driver version string. + std::string driver_version() const + { + return get_info<std::string>(CL_DRIVER_VERSION); + } + + /// Returns a list of extensions supported by the device. + std::vector<std::string> extensions() const + { + std::string extensions_string = + get_info<std::string>(CL_DEVICE_EXTENSIONS); + std::vector<std::string> extensions_vector; + boost::split(extensions_vector, + extensions_string, + boost::is_any_of("\t "), + boost::token_compress_on); + return extensions_vector; + } + + /// Returns \c true if the device supports the extension with + /// \p name. + bool supports_extension(const std::string &name) const + { + const std::vector<std::string> extensions = this->extensions(); + + return std::find( + extensions.begin(), extensions.end(), name) != extensions.end(); + } + + /// Returns the number of address bits. + uint_ address_bits() const + { + return get_info<uint_>(CL_DEVICE_ADDRESS_BITS); + } + + /// Returns the global memory size in bytes. + ulong_ global_memory_size() const + { + return get_info<ulong_>(CL_DEVICE_GLOBAL_MEM_SIZE); + } + + /// Returns the local memory size in bytes. + ulong_ local_memory_size() const + { + return get_info<ulong_>(CL_DEVICE_LOCAL_MEM_SIZE); + } + + /// Returns the clock frequency for the device's compute units. + uint_ clock_frequency() const + { + return get_info<uint_>(CL_DEVICE_MAX_CLOCK_FREQUENCY); + } + + /// Returns the number of compute units in the device. + uint_ compute_units() const + { + return get_info<uint_>(CL_DEVICE_MAX_COMPUTE_UNITS); + } + + /// \internal_ + ulong_ max_memory_alloc_size() const + { + return get_info<ulong_>(CL_DEVICE_MAX_MEM_ALLOC_SIZE); + } + + /// \internal_ + size_t max_work_group_size() const + { + return get_info<size_t>(CL_DEVICE_MAX_WORK_GROUP_SIZE); + } + + /// \internal_ + uint_ max_work_item_dimensions() const + { + return get_info<uint_>(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS); + } + + /// Returns the preferred vector width for type \c T. + template<class T> + uint_ preferred_vector_width() const + { + return 0; + } + + /// Returns the profiling timer resolution in nanoseconds. + size_t profiling_timer_resolution() const + { + return get_info<size_t>(CL_DEVICE_PROFILING_TIMER_RESOLUTION); + } + + /// Returns \c true if the device is a sub-device. + bool is_subdevice() const + { + #if defined(CL_VERSION_1_2) + try { + return get_info<cl_device_id>(CL_DEVICE_PARENT_DEVICE) != 0; + } + catch(opencl_error&){ + // the get_info() call above will throw if the device's opencl version + // is less than 1.2 (in which case it can't be a sub-device). + return false; + } + #else + return false; + #endif + } + + /// Returns information about the device. + /// + /// For example, to get the number of compute units: + /// \code + /// device.get_info<cl_uint>(CL_DEVICE_MAX_COMPUTE_UNITS); + /// \endcode + /// + /// Alternatively, the template-specialized version can be used which + /// automatically determines the result type: + /// \code + /// device.get_info<CL_DEVICE_MAX_COMPUTE_UNITS>(); + /// \endcode + /// + /// \see_opencl_ref{clGetDeviceInfo} + template<class T> + T get_info(cl_device_info info) const + { + return detail::get_object_info<T>(clGetDeviceInfo, m_id, info); + } + + /// \overload + template<int Enum> + typename detail::get_object_info_type<device, Enum>::type + get_info() const; + + #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + /// Partitions the device into multiple sub-devices according to + /// \p properties. + /// + /// \opencl_version_warning{1,2} + std::vector<device> + partition(const cl_device_partition_property *properties) const + { + // get sub-device count + uint_ count = 0; + int_ ret = clCreateSubDevices(m_id, properties, 0, 0, &count); + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + // get sub-device ids + std::vector<cl_device_id> ids(count); + ret = clCreateSubDevices(m_id, properties, count, &ids[0], 0); + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + // convert ids to device objects + std::vector<device> devices(count); + for(size_t i = 0; i < count; i++){ + devices[i] = device(ids[i], false); + } + + return devices; + } + + /// \opencl_version_warning{1,2} + std::vector<device> partition_equally(size_t count) const + { + cl_device_partition_property properties[] = { + CL_DEVICE_PARTITION_EQUALLY, + static_cast<cl_device_partition_property>(count), + 0 + }; + + return partition(properties); + } + + /// \opencl_version_warning{1,2} + std::vector<device> + partition_by_counts(const std::vector<size_t> &counts) const + { + std::vector<cl_device_partition_property> properties; + + properties.push_back(CL_DEVICE_PARTITION_BY_COUNTS); + for(size_t i = 0; i < counts.size(); i++){ + properties.push_back( + static_cast<cl_device_partition_property>(counts[i])); + } + properties.push_back(CL_DEVICE_PARTITION_BY_COUNTS_LIST_END); + properties.push_back(0); + + return partition(&properties[0]); + } + + /// \opencl_version_warning{1,2} + std::vector<device> + partition_by_affinity_domain(cl_device_affinity_domain domain) const + { + cl_device_partition_property properties[] = { + CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, + static_cast<cl_device_partition_property>(domain), + 0 + }; + + return partition(properties); + } + #endif // CL_VERSION_1_2 + + /// Returns \c true if the device is the same at \p other. + bool operator==(const device &other) const + { + return m_id == other.m_id; + } + + /// Returns \c true if the device is different from \p other. + bool operator!=(const device &other) const + { + return m_id != other.m_id; + } + + /// \internal_ + bool check_version(int major, int minor) const + { + std::stringstream stream; + stream << version(); + + int actual_major, actual_minor; + stream.ignore(7); // 'OpenCL ' + stream >> actual_major; + stream.ignore(1); // '.' + stream >> actual_minor; + + return actual_major > major || + (actual_major == major && actual_minor >= minor); + } + +private: + cl_device_id m_id; +}; + +/// \internal_ +template<> +inline uint_ device::preferred_vector_width<short_>() const +{ + return get_info<uint_>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT); +} + +/// \internal_ +template<> +inline uint_ device::preferred_vector_width<int_>() const +{ + return get_info<uint_>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT); +} + +/// \internal_ +template<> +inline uint_ device::preferred_vector_width<long_>() const +{ + return get_info<uint_>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG); +} + +/// \internal_ +template<> +inline uint_ device::preferred_vector_width<float_>() const +{ + return get_info<uint_>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT); +} + +/// \internal_ +template<> +inline uint_ device::preferred_vector_width<double_>() const +{ + return get_info<uint_>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE); +} + +/// \internal_ define get_info() specializations for device +BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device, + ((cl_uint, CL_DEVICE_ADDRESS_BITS)) + ((bool, CL_DEVICE_AVAILABLE)) + ((bool, CL_DEVICE_COMPILER_AVAILABLE)) + ((bool, CL_DEVICE_ENDIAN_LITTLE)) + ((bool, CL_DEVICE_ERROR_CORRECTION_SUPPORT)) + ((cl_device_exec_capabilities, CL_DEVICE_EXECUTION_CAPABILITIES)) + ((std::string, CL_DEVICE_EXTENSIONS)) + ((cl_ulong, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE)) + ((cl_device_mem_cache_type, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE)) + ((cl_ulong, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE)) + ((cl_ulong, CL_DEVICE_GLOBAL_MEM_SIZE)) + ((bool, CL_DEVICE_IMAGE_SUPPORT)) + ((size_t, CL_DEVICE_IMAGE2D_MAX_HEIGHT)) + ((size_t, CL_DEVICE_IMAGE2D_MAX_WIDTH)) + ((size_t, CL_DEVICE_IMAGE3D_MAX_DEPTH)) + ((size_t, CL_DEVICE_IMAGE3D_MAX_HEIGHT)) + ((size_t, CL_DEVICE_IMAGE3D_MAX_WIDTH)) + ((cl_ulong, CL_DEVICE_LOCAL_MEM_SIZE)) + ((cl_device_local_mem_type, CL_DEVICE_LOCAL_MEM_TYPE)) + ((cl_uint, CL_DEVICE_MAX_CLOCK_FREQUENCY)) + ((cl_uint, CL_DEVICE_MAX_COMPUTE_UNITS)) + ((cl_uint, CL_DEVICE_MAX_CONSTANT_ARGS)) + ((cl_ulong, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE)) + ((cl_ulong, CL_DEVICE_MAX_MEM_ALLOC_SIZE)) + ((size_t, CL_DEVICE_MAX_PARAMETER_SIZE)) + ((cl_uint, CL_DEVICE_MAX_READ_IMAGE_ARGS)) + ((cl_uint, CL_DEVICE_MAX_SAMPLERS)) + ((size_t, CL_DEVICE_MAX_WORK_GROUP_SIZE)) + ((cl_uint, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS)) + ((std::vector<size_t>, CL_DEVICE_MAX_WORK_ITEM_SIZES)) + ((cl_uint, CL_DEVICE_MAX_WRITE_IMAGE_ARGS)) + ((cl_uint, CL_DEVICE_MEM_BASE_ADDR_ALIGN)) + ((cl_uint, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE)) + ((std::string, CL_DEVICE_NAME)) + ((cl_platform_id, CL_DEVICE_PLATFORM)) + ((cl_uint, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR)) + ((cl_uint, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT)) + ((cl_uint, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT)) + ((cl_uint, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG)) + ((cl_uint, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT)) + ((cl_uint, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE)) + ((std::string, CL_DEVICE_PROFILE)) + ((size_t, CL_DEVICE_PROFILING_TIMER_RESOLUTION)) + ((cl_command_queue_properties, CL_DEVICE_QUEUE_PROPERTIES)) + ((cl_device_fp_config, CL_DEVICE_SINGLE_FP_CONFIG)) + ((cl_device_type, CL_DEVICE_TYPE)) + ((std::string, CL_DEVICE_VENDOR)) + ((cl_uint, CL_DEVICE_VENDOR_ID)) + ((std::string, CL_DEVICE_VERSION)) + ((std::string, CL_DRIVER_VERSION)) +) + +#ifdef CL_DEVICE_DOUBLE_FP_CONFIG +BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device, + ((cl_device_fp_config, CL_DEVICE_DOUBLE_FP_CONFIG)) +) +#endif + +#ifdef CL_DEVICE_HALF_FP_CONFIG +BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device, + ((cl_device_fp_config, CL_DEVICE_HALF_FP_CONFIG)) +) +#endif + +#ifdef CL_VERSION_1_1 +BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device, + ((bool, CL_DEVICE_HOST_UNIFIED_MEMORY)) + ((cl_uint, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR)) + ((cl_uint, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT)) + ((cl_uint, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT)) + ((cl_uint, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG)) + ((cl_uint, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT)) + ((cl_uint, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE)) + ((std::string, CL_DEVICE_OPENCL_C_VERSION)) +) +#endif // CL_VERSION_1_1 + +#ifdef CL_VERSION_1_2 +BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device, + ((std::string, CL_DEVICE_BUILT_IN_KERNELS)) + ((bool, CL_DEVICE_LINKER_AVAILABLE)) + ((cl_device_id, CL_DEVICE_PARENT_DEVICE)) + ((cl_uint, CL_DEVICE_PARTITION_MAX_SUB_DEVICES)) + ((cl_device_partition_property, CL_DEVICE_PARTITION_PROPERTIES)) + ((cl_device_affinity_domain, CL_DEVICE_PARTITION_AFFINITY_DOMAIN)) + ((cl_device_partition_property, CL_DEVICE_PARTITION_TYPE)) + ((size_t, CL_DEVICE_PRINTF_BUFFER_SIZE)) + ((bool, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC)) + ((cl_uint, CL_DEVICE_REFERENCE_COUNT)) +) +#endif // CL_VERSION_1_2 + +#ifdef CL_VERSION_2_0 +BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device, + ((size_t, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE)) + ((size_t, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE)) + ((cl_uint, CL_DEVICE_MAX_ON_DEVICE_EVENTS)) + ((cl_uint, CL_DEVICE_MAX_ON_DEVICE_QUEUES)) + ((cl_uint, CL_DEVICE_MAX_PIPE_ARGS)) + ((cl_uint, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS)) + ((cl_uint, CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS)) + ((cl_uint, CL_DEVICE_PIPE_MAX_PACKET_SIZE)) + ((cl_uint, CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT)) + ((cl_uint, CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT)) + ((cl_uint, CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT)) + ((cl_uint, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE)) + ((cl_uint, CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE)) + ((cl_command_queue_properties, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES)) + ((cl_device_svm_capabilities, CL_DEVICE_SVM_CAPABILITIES)) + ((cl_uint, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT)) + ((cl_uint, CL_DEVICE_IMAGE_PITCH_ALIGNMENT)) +) +#endif // CL_VERSION_2_0 + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_DEVICE_HPP diff --git a/boost/compute/event.hpp b/boost/compute/event.hpp new file mode 100644 index 0000000000..2f53d87650 --- /dev/null +++ b/boost/compute/event.hpp @@ -0,0 +1,338 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_EVENT_HPP +#define BOOST_COMPUTE_EVENT_HPP + +#include <boost/function.hpp> + +#include <boost/compute/config.hpp> +#include <boost/compute/exception.hpp> +#include <boost/compute/detail/duration.hpp> +#include <boost/compute/detail/get_object_info.hpp> +#include <boost/compute/detail/assert_cl_success.hpp> +#include <boost/compute/types/fundamental.hpp> + +namespace boost { +namespace compute { + +/// \class event +/// \brief An event corresponding to an operation on a compute device +/// +/// Event objects are used to track operations running on the device (such as +/// kernel executions and memory transfers). Event objects are returned by the +/// various \c enqueue_* methods of the command_queue class. +/// +/// Events can be used to synchronize operations between the host and the +/// device. The \c wait() method will block execution on the host until the +/// operation corresponding to the event on the device has completed. The +/// status of the operation can also be polled with the \c status() method. +/// +/// Event objects can also be used for performance profiling. In order to use +/// events for profiling, the command queue must be constructed with the +/// \c CL_QUEUE_PROFILING_ENABLE flag. Then the \c duration() method can be +/// used to retrieve the total duration of the operation on the device: +/// \code +/// std::cout << "time = " << e.duration<std::chrono::milliseconds>().count() << "ms\n"; +/// \endcode +/// +/// \see \ref future "future<T>", wait_list +class event +{ +public: + /// \internal_ + enum execution_status { + complete = CL_COMPLETE, + running = CL_RUNNING, + submitted = CL_SUBMITTED, + queued = CL_QUEUED + }; + + /// \internal_ + enum command_type { + ndrange_kernel = CL_COMMAND_NDRANGE_KERNEL, + task = CL_COMMAND_TASK, + native_kernel = CL_COMMAND_NATIVE_KERNEL, + read_buffer = CL_COMMAND_READ_BUFFER, + write_buffer = CL_COMMAND_WRITE_BUFFER, + copy_buffer = CL_COMMAND_COPY_BUFFER, + read_image = CL_COMMAND_READ_IMAGE, + write_image = CL_COMMAND_WRITE_IMAGE, + copy_image = CL_COMMAND_COPY_IMAGE, + copy_image_to_buffer = CL_COMMAND_COPY_IMAGE_TO_BUFFER, + copy_buffer_to_image = CL_COMMAND_COPY_BUFFER_TO_IMAGE, + map_buffer = CL_COMMAND_MAP_BUFFER, + map_image = CL_COMMAND_MAP_IMAGE, + unmap_mem_object = CL_COMMAND_UNMAP_MEM_OBJECT, + marker = CL_COMMAND_MARKER, + aquire_gl_objects = CL_COMMAND_ACQUIRE_GL_OBJECTS, + release_gl_object = CL_COMMAND_RELEASE_GL_OBJECTS + #if defined(CL_VERSION_1_1) + , + read_buffer_rect = CL_COMMAND_READ_BUFFER_RECT, + write_buffer_rect = CL_COMMAND_WRITE_BUFFER_RECT, + copy_buffer_rect = CL_COMMAND_COPY_BUFFER_RECT + #endif + }; + + /// \internal_ + enum profiling_info { + profiling_command_queued = CL_PROFILING_COMMAND_QUEUED, + profiling_command_submit = CL_PROFILING_COMMAND_SUBMIT, + profiling_command_start = CL_PROFILING_COMMAND_START, + profiling_command_end = CL_PROFILING_COMMAND_END + }; + + /// Creates a null event object. + event() + : m_event(0) + { + } + + explicit event(cl_event event, bool retain = true) + : m_event(event) + { + if(m_event && retain){ + clRetainEvent(event); + } + } + + /// Makes a new event as a copy of \p other. + event(const event &other) + : m_event(other.m_event) + { + if(m_event){ + clRetainEvent(m_event); + } + } + + /// Copies the event object from \p other to \c *this. + event& operator=(const event &other) + { + if(this != &other){ + if(m_event){ + clReleaseEvent(m_event); + } + + m_event = other.m_event; + + if(m_event){ + clRetainEvent(m_event); + } + } + + return *this; + } + + #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES + /// Move-constructs a new event object from \p other. + event(event&& other) BOOST_NOEXCEPT + : m_event(other.m_event) + { + other.m_event = 0; + } + + /// Move-assigns the event from \p other to \c *this. + event& operator=(event&& other) BOOST_NOEXCEPT + { + if(m_event){ + clReleaseEvent(m_event); + } + + m_event = other.m_event; + other.m_event = 0; + + return *this; + } + #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES + + /// Destroys the event object. + ~event() + { + if(m_event){ + BOOST_COMPUTE_ASSERT_CL_SUCCESS( + clReleaseEvent(m_event) + ); + } + } + + /// Returns a reference to the underlying OpenCL event object. + cl_event& get() const + { + return const_cast<cl_event &>(m_event); + } + + /// Returns the status of the event. + cl_int status() const + { + return get_info<cl_int>(CL_EVENT_COMMAND_EXECUTION_STATUS); + } + + /// Returns the command type for the event. + cl_command_type get_command_type() const + { + return get_info<cl_command_type>(CL_EVENT_COMMAND_TYPE); + } + + /// Returns information about the event. + /// + /// \see_opencl_ref{clGetEventInfo} + template<class T> + T get_info(cl_event_info info) const + { + return detail::get_object_info<T>(clGetEventInfo, m_event, info); + } + + /// \overload + template<int Enum> + typename detail::get_object_info_type<event, Enum>::type + get_info() const; + + /// Returns profiling information for the event. + /// + /// \see event::duration() + /// + /// \see_opencl_ref{clGetEventProfilingInfo} + template<class T> + T get_profiling_info(cl_profiling_info info) const + { + return detail::get_object_info<T>(clGetEventProfilingInfo, + m_event, + info); + } + + /// Blocks until the actions corresponding to the event have + /// completed. + void wait() const + { + cl_int ret = clWaitForEvents(1, &m_event); + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + } + + #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + /// Registers a function to be called when the event status changes to + /// \p status (by default CL_COMPLETE). The callback is passed the OpenCL + /// event object, the event status, and a pointer to arbitrary user data. + /// + /// \see_opencl_ref{clSetEventCallback} + /// + /// \opencl_version_warning{1,1} + void set_callback(void (BOOST_COMPUTE_CL_CALLBACK *callback)( + cl_event event, cl_int status, void *user_data + ), + cl_int status = CL_COMPLETE, + void *user_data = 0) + { + cl_int ret = clSetEventCallback(m_event, status, callback, user_data); + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + } + + /// Registers a generic function to be called when the event status + /// changes to \p status (by default \c CL_COMPLETE). + /// + /// The function specified by \p callback must be invokable with zero + /// arguments (e.g. \c callback()). + /// + /// \opencl_version_warning{1,1} + template<class Function> + void set_callback(Function callback, cl_int status = CL_COMPLETE) + { + set_callback( + event_callback_invoker, + status, + new boost::function<void()>(callback) + ); + } + #endif // CL_VERSION_1_1 + + /// Returns the total duration of the event from \p start to \p end. + /// + /// For example, to print the number of milliseconds the event took to + /// execute: + /// \code + /// std::cout << event.duration<std::chrono::milliseconds>().count() << " ms" << std::endl; + /// \endcode + /// + /// \see event::get_profiling_info() + template<class Duration> + Duration duration(cl_profiling_info start = CL_PROFILING_COMMAND_START, + cl_profiling_info end = CL_PROFILING_COMMAND_END) const + { + const ulong_ nanoseconds = + get_profiling_info<ulong_>(end) - get_profiling_info<ulong_>(start); + + return detail::make_duration_from_nanoseconds(Duration(), nanoseconds); + } + + /// Returns \c true if the event is the same as \p other. + bool operator==(const event &other) const + { + return m_event == other.m_event; + } + + /// Returns \c true if the event is different from \p other. + bool operator!=(const event &other) const + { + return m_event != other.m_event; + } + + /// \internal_ + operator cl_event() const + { + return m_event; + } + + /// \internal_ (deprecated) + cl_int get_status() const + { + return status(); + } + +private: + #ifdef CL_VERSION_1_1 + /// \internal_ + static void BOOST_COMPUTE_CL_CALLBACK + event_callback_invoker(cl_event, cl_int, void *user_data) + { + boost::function<void()> *callback = + static_cast<boost::function<void()> *>(user_data); + + (*callback)(); + + delete callback; + } + #endif // CL_VERSION_1_1 + +protected: + cl_event m_event; +}; + +/// \internal_ define get_info() specializations for event +BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(event, + ((cl_command_queue, CL_EVENT_COMMAND_QUEUE)) + ((cl_command_type, CL_EVENT_COMMAND_TYPE)) + ((cl_int, CL_EVENT_COMMAND_EXECUTION_STATUS)) + ((cl_uint, CL_EVENT_REFERENCE_COUNT)) +) + +#ifdef CL_VERSION_1_1 +BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(event, + ((cl_context, CL_EVENT_CONTEXT)) +) +#endif + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_EVENT_HPP diff --git a/boost/compute/exception.hpp b/boost/compute/exception.hpp new file mode 100644 index 0000000000..10a271de03 --- /dev/null +++ b/boost/compute/exception.hpp @@ -0,0 +1,23 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_EXCEPTION_HPP +#define BOOST_COMPUTE_EXCEPTION_HPP + +/// \file +/// +/// Meta-header to include all Boost.Compute exception headers. + +#include <boost/compute/exception/context_error.hpp> +#include <boost/compute/exception/no_device_found.hpp> +#include <boost/compute/exception/opencl_error.hpp> +#include <boost/compute/exception/unsupported_extension_error.hpp> + +#endif // BOOST_COMPUTE_EXCEPTION_HPP diff --git a/boost/compute/exception/context_error.hpp b/boost/compute/exception/context_error.hpp new file mode 100644 index 0000000000..eeb387d884 --- /dev/null +++ b/boost/compute/exception/context_error.hpp @@ -0,0 +1,88 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_EXCEPTION_CONTEXT_ERROR_HPP +#define BOOST_COMPUTE_EXCEPTION_CONTEXT_ERROR_HPP + +#include <exception> + +namespace boost { +namespace compute { + +class context; + +/// \class context_error +/// \brief A run-time OpenCL context error. +/// +/// The context_error exception is thrown when the OpenCL context encounters +/// an error condition. Boost.Compute is notified of these error conditions by +/// registering an error handler when creating context objects (via the +/// \c pfn_notify argument to the \c clCreateContext() function). +/// +/// This exception is different than the opencl_error exception which is thrown +/// as a result of error caused when calling a single OpenCL API function. +/// +/// \see opencl_error +class context_error : public std::exception +{ +public: + /// Creates a new context error exception object. + context_error(const context *context, + const char *errinfo, + const void *private_info, + size_t private_info_size) throw() + : m_context(context), + m_errinfo(errinfo), + m_private_info(private_info), + m_private_info_size(private_info_size) + { + } + + /// Destroys the context error object. + ~context_error() throw() + { + } + + /// Returns a string with a description of the error. + const char* what() const throw() + { + return m_errinfo; + } + + /// Returns a pointer to the context object which generated the error + /// notification. + const context* get_context_ptr() const throw() + { + return m_context; + } + + /// Returns a pointer to the private info memory block. + const void* get_private_info_ptr() const throw() + { + return m_private_info; + } + + /// Returns the size of the private info memory block. + const size_t get_private_info_size() const throw() + { + return m_private_info_size; + } + +private: + const context *m_context; + const char *m_errinfo; + const void *m_private_info; + size_t m_private_info_size; +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_EXCEPTION_CONTEXT_ERROR_HPP diff --git a/boost/compute/exception/no_device_found.hpp b/boost/compute/exception/no_device_found.hpp new file mode 100644 index 0000000000..fb73942e0c --- /dev/null +++ b/boost/compute/exception/no_device_found.hpp @@ -0,0 +1,48 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_EXCEPTION_NO_DEVICE_FOUND_HPP +#define BOOST_COMPUTE_EXCEPTION_NO_DEVICE_FOUND_HPP + +#include <exception> + +namespace boost { +namespace compute { + +/// \class no_device_found +/// \brief Exception thrown when no OpenCL device is found +/// +/// This exception is thrown when no valid OpenCL device can be found. +/// +/// \see opencl_error +class no_device_found : public std::exception +{ +public: + /// Creates a new no_device_found exception object. + no_device_found() throw() + { + } + + /// Destroys the no_device_found exception object. + ~no_device_found() throw() + { + } + + /// Returns a string containing a human-readable error message. + const char* what() const throw() + { + return "No OpenCL device found"; + } +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_EXCEPTION_NO_DEVICE_FOUND_HPP diff --git a/boost/compute/exception/opencl_error.hpp b/boost/compute/exception/opencl_error.hpp new file mode 100644 index 0000000000..29a3a9d258 --- /dev/null +++ b/boost/compute/exception/opencl_error.hpp @@ -0,0 +1,158 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_EXCEPTION_OPENCL_ERROR_HPP +#define BOOST_COMPUTE_EXCEPTION_OPENCL_ERROR_HPP + +#include <exception> +#include <string> +#include <sstream> + +#include <boost/compute/cl.hpp> + +namespace boost { +namespace compute { + +/// \class opencl_error +/// \brief A run-time OpenCL error. +/// +/// The opencl_error class represents an error returned from an OpenCL +/// function. +/// +/// \see context_error +class opencl_error : public std::exception +{ +public: + /// Creates a new opencl_error exception object for \p error. + explicit opencl_error(cl_int error) throw() + : m_error(error), + m_error_string(to_string(error)) + { + } + + /// Destroys the opencl_error object. + ~opencl_error() throw() + { + } + + /// Returns the numeric error code. + cl_int error_code() const throw() + { + return m_error; + } + + /// Returns a string description of the error. + std::string error_string() const throw() + { + return m_error_string; + } + + /// Returns a C-string description of the error. + const char* what() const throw() + { + return m_error_string.c_str(); + } + + /// Static function which converts the numeric OpenCL error code \p error + /// to a human-readable string. + /// + /// For example: + /// \code + /// std::cout << opencl_error::to_string(CL_INVALID_KERNEL_ARGS) << std::endl; + /// \endcode + /// + /// Will print "Invalid Kernel Arguments". + /// + /// If the error code is unknown (e.g. not a valid OpenCL error), a string + /// containing "Unknown OpenCL Error" along with the error number will be + /// returned. + static std::string to_string(cl_int error) + { + switch(error){ + case CL_SUCCESS: return "Success"; + case CL_DEVICE_NOT_FOUND: return "Device Not Found"; + case CL_DEVICE_NOT_AVAILABLE: return "Device Not Available"; + case CL_COMPILER_NOT_AVAILABLE: return "Compiler Not Available"; + case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "Memory Object Allocation Failure"; + case CL_OUT_OF_RESOURCES: return "Out of Resources"; + case CL_OUT_OF_HOST_MEMORY: return "Out of Host Memory"; + case CL_PROFILING_INFO_NOT_AVAILABLE: return "Profiling Information Not Available"; + case CL_MEM_COPY_OVERLAP: return "Memory Copy Overlap"; + case CL_IMAGE_FORMAT_MISMATCH: return "Image Format Mismatch"; + case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "Image Format Not Supported"; + case CL_BUILD_PROGRAM_FAILURE: return "Build Program Failure"; + case CL_MAP_FAILURE: return "Map Failure"; + case CL_INVALID_VALUE: return "Invalid Value"; + case CL_INVALID_DEVICE_TYPE: return "Invalid Device Type"; + case CL_INVALID_PLATFORM: return "Invalid Platform"; + case CL_INVALID_DEVICE: return "Invalid Device"; + case CL_INVALID_CONTEXT: return "Invalid Context"; + case CL_INVALID_QUEUE_PROPERTIES: return "Invalid Queue Properties"; + case CL_INVALID_COMMAND_QUEUE: return "Invalid Command Queue"; + case CL_INVALID_HOST_PTR: return "Invalid Host Pointer"; + case CL_INVALID_MEM_OBJECT: return "Invalid Memory Object"; + case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "Invalid Image Format Descriptor"; + case CL_INVALID_IMAGE_SIZE: return "Invalid Image Size"; + case CL_INVALID_SAMPLER: return "Invalid Sampler"; + case CL_INVALID_BINARY: return "Invalid Binary"; + case CL_INVALID_BUILD_OPTIONS: return "Invalid Build Options"; + case CL_INVALID_PROGRAM: return "Invalid Program"; + case CL_INVALID_PROGRAM_EXECUTABLE: return "Invalid Program Executable"; + case CL_INVALID_KERNEL_NAME: return "Invalid Kernel Name"; + case CL_INVALID_KERNEL_DEFINITION: return "Invalid Kernel Definition"; + case CL_INVALID_KERNEL: return "Invalid Kernel"; + case CL_INVALID_ARG_INDEX: return "Invalid Argument Index"; + case CL_INVALID_ARG_VALUE: return "Invalid Argument Value"; + case CL_INVALID_ARG_SIZE: return "Invalid Argument Size"; + case CL_INVALID_KERNEL_ARGS: return "Invalid Kernel Arguments"; + case CL_INVALID_WORK_DIMENSION: return "Invalid Work Dimension"; + case CL_INVALID_WORK_GROUP_SIZE: return "Invalid Work Group Size"; + case CL_INVALID_WORK_ITEM_SIZE: return "Invalid Work Item Size"; + case CL_INVALID_GLOBAL_OFFSET: return "Invalid Global Offset"; + case CL_INVALID_EVENT_WAIT_LIST: return "Invalid Event Wait List"; + case CL_INVALID_EVENT: return "Invalid Event"; + case CL_INVALID_OPERATION: return "Invalid Operation"; + case CL_INVALID_GL_OBJECT: return "Invalid GL Object"; + case CL_INVALID_BUFFER_SIZE: return "Invalid Buffer Size"; + case CL_INVALID_MIP_LEVEL: return "Invalid MIP Level"; + case CL_INVALID_GLOBAL_WORK_SIZE: return "Invalid Global Work Size"; + #ifdef CL_VERSION_1_2 + case CL_COMPILE_PROGRAM_FAILURE: return "Compile Program Failure"; + case CL_LINKER_NOT_AVAILABLE: return "Linker Not Available"; + case CL_LINK_PROGRAM_FAILURE: return "Link Program Failure"; + case CL_DEVICE_PARTITION_FAILED: return "Device Partition Failed"; + case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: return "Kernel Argument Info Not Available"; + case CL_INVALID_PROPERTY: return "Invalid Property"; + case CL_INVALID_IMAGE_DESCRIPTOR: return "Invalid Image Descriptor"; + case CL_INVALID_COMPILER_OPTIONS: return "Invalid Compiler Options"; + case CL_INVALID_LINKER_OPTIONS: return "Invalid Linker Options"; + case CL_INVALID_DEVICE_PARTITION_COUNT: return "Invalid Device Partition Count"; + #endif // CL_VERSION_1_2 + #ifdef CL_VERSION_2_0 + case CL_INVALID_PIPE_SIZE: return "Invalid Pipe Size"; + case CL_INVALID_DEVICE_QUEUE: return "Invalid Device Queue"; + #endif + default: { + std::stringstream s; + s << "Unknown OpenCL Error (" << error << ")"; + return s.str(); + } + } + } + +private: + cl_int m_error; + std::string m_error_string; +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_EXCEPTION_OPENCL_ERROR_HPP diff --git a/boost/compute/exception/unsupported_extension_error.hpp b/boost/compute/exception/unsupported_extension_error.hpp new file mode 100644 index 0000000000..c6f4de6c33 --- /dev/null +++ b/boost/compute/exception/unsupported_extension_error.hpp @@ -0,0 +1,71 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_EXCEPTION_UNSUPPORTED_EXTENSION_ERROR_HPP +#define BOOST_COMPUTE_EXCEPTION_UNSUPPORTED_EXTENSION_ERROR_HPP + +#include <exception> +#include <sstream> +#include <string> + +namespace boost { +namespace compute { + +/// \class unsupported_extension_error +/// \brief Exception thrown when attempting to use an unsupported +/// OpenCL extension. +/// +/// This exception is thrown when the user attempts to use an OpenCL +/// extension which is not supported on the platform and/or device. +/// +/// An example of this is attempting to use CL-GL sharing on a non-GPU +/// device. +/// +/// \see opencl_error +class unsupported_extension_error : public std::exception +{ +public: + /// Creates a new unsupported extension error exception object indicating + /// that \p extension is not supported by the OpenCL platform or device. + explicit unsupported_extension_error(const char *extension) throw() + : m_extension(extension) + { + std::stringstream msg; + msg << "OpenCL extension " << extension << " not supported"; + m_error_string = msg.str(); + } + + /// Destroys the unsupported extension error object. + ~unsupported_extension_error() throw() + { + } + + /// Returns the name of the unsupported extension. + std::string extension_name() const throw() + { + return m_extension; + } + + /// Returns a string containing a human-readable error message containing + /// the name of the unsupported exception. + const char* what() const throw() + { + return m_error_string.c_str(); + } + +private: + std::string m_extension; + std::string m_error_string; +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_EXCEPTION_UNSUPPORTED_EXTENSION_ERROR_HPP diff --git a/boost/compute/experimental/clamp_range.hpp b/boost/compute/experimental/clamp_range.hpp new file mode 100644 index 0000000000..0c2260498f --- /dev/null +++ b/boost/compute/experimental/clamp_range.hpp @@ -0,0 +1,49 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_EXPERIMENTAL_CLAMP_RANGE_HPP +#define BOOST_COMPUTE_EXPERIMENTAL_CLAMP_RANGE_HPP + +#include <iterator> + +#include <boost/compute/lambda.hpp> +#include <boost/compute/algorithm/transform.hpp> + +namespace boost { +namespace compute { +namespace experimental { + +template<class InputIterator, class OutputIterator> +inline OutputIterator +clamp_range(InputIterator first, + InputIterator last, + OutputIterator result, + typename std::iterator_traits<InputIterator>::value_type lo, + typename std::iterator_traits<InputIterator>::value_type hi, + command_queue &queue) +{ + using ::boost::compute::lambda::_1; + using ::boost::compute::lambda::_2; + using ::boost::compute::lambda::clamp; + + return ::boost::compute::transform( + first, + last, + result, + clamp(_1, lo, hi), + queue + ); +} + +} // end experimental namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_EXPERIMENTAL_CLAMP_RANGE_HPP diff --git a/boost/compute/experimental/malloc.hpp b/boost/compute/experimental/malloc.hpp new file mode 100644 index 0000000000..ad96888743 --- /dev/null +++ b/boost/compute/experimental/malloc.hpp @@ -0,0 +1,51 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_EXPERIMENTAL_MALLOC_HPP +#define BOOST_COMPUTE_EXPERIMENTAL_MALLOC_HPP + +#include <boost/compute/buffer.hpp> +#include <boost/compute/system.hpp> +#include <boost/compute/context.hpp> +#include <boost/compute/detail/device_ptr.hpp> + +namespace boost { +namespace compute { +namespace experimental { + +// bring device_ptr into the experimental namespace +using detail::device_ptr; + +template<class T> +inline device_ptr<T> +malloc(std::size_t size, const context &context = system::default_context()) +{ + buffer buf(context, size * sizeof(T)); + clRetainMemObject(buf.get()); + return device_ptr<T>(buf); +} + +inline device_ptr<char> +malloc(std::size_t size, const context &context = system::default_context()) +{ + return malloc<char>(size, context); +} + +template<class T> +inline void free(device_ptr<T> &ptr) +{ + clReleaseMemObject(ptr.get_buffer().get()); +} + +} // end experimental namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_EXPERIMENTAL_MALLOC_HPP diff --git a/boost/compute/experimental/sort_by_transform.hpp b/boost/compute/experimental/sort_by_transform.hpp new file mode 100644 index 0000000000..3d84ba9810 --- /dev/null +++ b/boost/compute/experimental/sort_by_transform.hpp @@ -0,0 +1,66 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_EXPERIMENTAL_SORT_BY_TRANSFORM_HPP +#define BOOST_COMPUTE_EXPERIMENTAL_SORT_BY_TRANSFORM_HPP + +#include <iterator> + +#include <boost/compute/algorithm/sort_by_key.hpp> +#include <boost/compute/algorithm/transform.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/type_traits/result_of.hpp> + +namespace boost { +namespace compute { +namespace experimental { + +template<class Iterator, class Transform, class Compare> +inline void sort_by_transform(Iterator first, + Iterator last, + Transform transform, + Compare compare, + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<Iterator>::value_type value_type; + typedef typename boost::compute::result_of<Transform(value_type)>::type key_type; + + size_t n = detail::iterator_range_size(first, last); + if(n < 2){ + return; + } + + const context &context = queue.get_context(); + + ::boost::compute::vector<key_type> keys(n, context); + + ::boost::compute::transform( + first, + last, + keys.begin(), + transform, + queue + ); + + ::boost::compute::sort_by_key( + keys.begin(), + keys.end(), + first, + compare, + queue + ); +} + +} // end experimental namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_EXPERIMENTAL_SORT_BY_TRANSFORM_HPP diff --git a/boost/compute/experimental/tabulate.hpp b/boost/compute/experimental/tabulate.hpp new file mode 100644 index 0000000000..4f607e7961 --- /dev/null +++ b/boost/compute/experimental/tabulate.hpp @@ -0,0 +1,44 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_EXPERIMENTAL_TABULATE_HPP +#define BOOST_COMPUTE_EXPERIMENTAL_TABULATE_HPP + +#include <iterator> + +#include <boost/compute/algorithm/transform.hpp> +#include <boost/compute/iterator/counting_iterator.hpp> + +namespace boost { +namespace compute { +namespace experimental { + +template<class Iterator, class UnaryFunction> +inline void tabulate(Iterator first, + Iterator last, + UnaryFunction function, + command_queue &queue) +{ + size_t n = detail::iterator_range_size(first, last); + + ::boost::compute::transform( + ::boost::compute::make_counting_iterator<int>(0), + ::boost::compute::make_counting_iterator<int>(n), + first, + function, + queue + ); +} + +} // end experimental namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_EXPERIMENTAL_TABULATE_HPP diff --git a/boost/compute/function.hpp b/boost/compute/function.hpp new file mode 100644 index 0000000000..e83f16808a --- /dev/null +++ b/boost/compute/function.hpp @@ -0,0 +1,454 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_FUNCTION_HPP +#define BOOST_COMPUTE_FUNCTION_HPP + +#include <map> +#include <string> +#include <sstream> +#include <vector> + +#include <boost/assert.hpp> +#include <boost/config.hpp> +#include <boost/function_types/parameter_types.hpp> +#include <boost/preprocessor/repetition.hpp> +#include <boost/mpl/for_each.hpp> +#include <boost/mpl/size.hpp> +#include <boost/mpl/transform.hpp> +#include <boost/static_assert.hpp> +#include <boost/tuple/tuple.hpp> +#include <boost/type_traits/add_pointer.hpp> +#include <boost/type_traits/function_traits.hpp> + +#include <boost/compute/cl.hpp> +#include <boost/compute/config.hpp> +#include <boost/compute/type_traits/type_name.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class ResultType, class ArgTuple> +class invoked_function +{ +public: + typedef ResultType result_type; + + BOOST_STATIC_CONSTANT( + size_t, arity = boost::tuples::length<ArgTuple>::value + ); + + invoked_function(const std::string &name, + const std::string &source) + : m_name(name), + m_source(source) + { + } + + invoked_function(const std::string &name, + const std::string &source, + const std::map<std::string, std::string> &definitions) + : m_name(name), + m_source(source), + m_definitions(definitions) + { + } + + invoked_function(const std::string &name, + const std::string &source, + const ArgTuple &args) + : m_name(name), + m_source(source), + m_args(args) + { + } + + invoked_function(const std::string &name, + const std::string &source, + const std::map<std::string, std::string> &definitions, + const ArgTuple &args) + : m_name(name), + m_source(source), + m_definitions(definitions), + m_args(args) + { + } + + std::string name() const + { + return m_name; + } + + std::string source() const + { + return m_source; + } + + const std::map<std::string, std::string>& definitions() const + { + return m_definitions; + } + + const ArgTuple& args() const + { + return m_args; + } + +private: + std::string m_name; + std::string m_source; + std::map<std::string, std::string> m_definitions; + ArgTuple m_args; +}; + +} // end detail namespace + +/// \class function +/// \brief A function object. +template<class Signature> +class function +{ +public: + /// \internal_ + typedef typename + boost::function_traits<Signature>::result_type result_type; + + /// \internal_ + BOOST_STATIC_CONSTANT( + size_t, arity = boost::function_traits<Signature>::arity + ); + + /// \internal_ + typedef Signature signature; + + /// Creates a new function object with \p name. + function(const std::string &name) + : m_name(name) + { + } + + /// Destroys the function object. + ~function() + { + } + + /// \internal_ + std::string name() const + { + return m_name; + } + + /// \internal_ + void set_source(const std::string &source) + { + m_source = source; + } + + /// \internal_ + std::string source() const + { + return m_source; + } + + /// \internal_ + void define(std::string name, std::string value = std::string()) + { + m_definitions[name] = value; + } + + /// \internal_ + detail::invoked_function<result_type, boost::tuple<> > + operator()() const + { + BOOST_STATIC_ASSERT_MSG( + arity == 0, + "Non-nullary function invoked with zero arguments" + ); + + return detail::invoked_function<result_type, boost::tuple<> >( + m_name, m_source, m_definitions + ); + } + + /// \internal_ + template<class Arg1> + detail::invoked_function<result_type, boost::tuple<Arg1> > + operator()(const Arg1 &arg1) const + { + BOOST_STATIC_ASSERT_MSG( + arity == 1, + "Non-unary function invoked one argument" + ); + + return detail::invoked_function<result_type, boost::tuple<Arg1> >( + m_name, m_source, m_definitions, boost::make_tuple(arg1) + ); + } + + /// \internal_ + template<class Arg1, class Arg2> + detail::invoked_function<result_type, boost::tuple<Arg1, Arg2> > + operator()(const Arg1 &arg1, const Arg2 &arg2) const + { + BOOST_STATIC_ASSERT_MSG( + arity == 2, + "Non-binary function invoked with two arguments" + ); + + return detail::invoked_function<result_type, boost::tuple<Arg1, Arg2> >( + m_name, m_source, m_definitions, boost::make_tuple(arg1, arg2) + ); + } + + /// \internal_ + template<class Arg1, class Arg2, class Arg3> + detail::invoked_function<result_type, boost::tuple<Arg1, Arg2, Arg3> > + operator()(const Arg1 &arg1, const Arg2 &arg2, const Arg3 &arg3) const + { + BOOST_STATIC_ASSERT_MSG( + arity == 3, + "Non-ternary function invoked with two arguments" + ); + + return detail::invoked_function<result_type, boost::tuple<Arg1, Arg2, Arg3> >( + m_name, m_source, m_definitions, boost::make_tuple(arg1, arg2, arg3) + ); + } + +private: + std::string m_name; + std::string m_source; + std::map<std::string, std::string> m_definitions; +}; + +/// Creates a function object given its \p name and \p source. +/// +/// \param name The function name. +/// \param source The function source code. +/// +/// \see BOOST_COMPUTE_FUNCTION() +template<class Signature> +inline function<Signature> +make_function_from_source(const std::string &name, const std::string &source) +{ + function<Signature> f(name); + f.set_source(source); + return f; +} + +namespace detail { + +// given a string containing the arguments declaration for a function +// like: "(int a, const float b)", returns a vector containing the name +// of each argument (e.g. ["a", "b"]). +inline std::vector<std::string> parse_argument_names(const char *arguments) +{ + BOOST_ASSERT_MSG( + arguments[0] == '(' && arguments[std::strlen(arguments)-1] == ')', + "Arguments should start and end with parentheses" + ); + + std::vector<std::string> args; + + size_t last_space = 0; + size_t skip_comma = 0; + for(size_t i = 1; i < std::strlen(arguments) - 2; i++){ + const char c = arguments[i]; + + if(c == ' '){ + last_space = i; + } + else if(c == ',' && !skip_comma){ + std::string name( + arguments + last_space + 1, i - last_space - 1 + ); + args.push_back(name); + } + else if(c == '<'){ + skip_comma++; + } + else if(c == '>'){ + skip_comma--; + } + } + + std::string last_argument( + arguments + last_space + 1, std::strlen(arguments) - last_space - 2 + ); + args.push_back(last_argument); + + return args; +} + +struct signature_argument_inserter +{ + signature_argument_inserter(std::stringstream &s_, const char *arguments, size_t last) + : s(s_) + { + n = 0; + m_last = last; + + m_argument_names = parse_argument_names(arguments); + + BOOST_ASSERT_MSG( + m_argument_names.size() == last, + "Wrong number of arguments" + ); + } + + template<class T> + void operator()(const T*) + { + s << type_name<T>() << " " << m_argument_names[n]; + if(n+1 < m_last){ + s << ", "; + } + n++; + } + + size_t n; + size_t m_last; + std::stringstream &s; + std::vector<std::string> m_argument_names; +}; + +template<class Signature> +inline std::string make_function_declaration(const char *name, const char *arguments) +{ + typedef typename + boost::function_traits<Signature>::result_type result_type; + typedef typename + boost::function_types::parameter_types<Signature>::type parameter_types; + typedef typename + mpl::size<parameter_types>::type arity_type; + + std::stringstream s; + s << "inline " << type_name<result_type>() << " " << name; + s << "("; + + if(arity_type::value > 0){ + signature_argument_inserter i(s, arguments, arity_type::value); + mpl::for_each< + typename mpl::transform<parameter_types, boost::add_pointer<mpl::_1> + >::type>(i); + } + + s << ")"; + return s.str(); +} + +struct argument_list_inserter +{ + argument_list_inserter(std::stringstream &s_, const char first, size_t last) + : s(s_) + { + n = 0; + m_last = last; + m_name = first; + } + + template<class T> + void operator()(const T*) + { + s << type_name<T>() << " " << m_name++; + if(n+1 < m_last){ + s << ", "; + } + n++; + } + + size_t n; + size_t m_last; + char m_name; + std::stringstream &s; +}; + +template<class Signature> +inline std::string generate_argument_list(const char first = 'a') +{ + typedef typename + boost::function_types::parameter_types<Signature>::type parameter_types; + typedef typename + mpl::size<parameter_types>::type arity_type; + + std::stringstream s; + s << '('; + + if(arity_type::value > 0){ + argument_list_inserter i(s, first, arity_type::value); + mpl::for_each< + typename mpl::transform<parameter_types, boost::add_pointer<mpl::_1> + >::type>(i); + } + + s << ')'; + return s.str(); +} + +// used by the BOOST_COMPUTE_FUNCTION() macro to create a function +// with the given signature, name, arguments, and source. +template<class Signature> +inline function<Signature> +make_function_impl(const char *name, const char *arguments, const char *source) +{ + std::stringstream s; + s << make_function_declaration<Signature>(name, arguments); + s << source; + + return make_function_from_source<Signature>(name, s.str()); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +/// Creates a function object with \p name and \p source. +/// +/// \param return_type The return type for the function. +/// \param name The name of the function. +/// \param arguments A list of arguments for the function. +/// \param source The OpenCL C source code for the function. +/// +/// The function declaration and signature are automatically created using +/// the \p return_type, \p name, and \p arguments macro parameters. +/// +/// The source code for the function is interpreted as OpenCL C99 source code +/// which is stringified and passed to the OpenCL compiler when the function +/// is invoked. +/// +/// For example, to create a function which squares a number: +/// \code +/// BOOST_COMPUTE_FUNCTION(float, square, (float x), +/// { +/// return x * x; +/// }); +/// \endcode +/// +/// And to create a function which sums two numbers: +/// \code +/// BOOST_COMPUTE_FUNCTION(int, sum_two, (int x, int y), +/// { +/// return x + y; +/// }); +/// \endcode +/// +/// \see BOOST_COMPUTE_CLOSURE() +#ifdef BOOST_COMPUTE_DOXYGEN_INVOKED +#define BOOST_COMPUTE_FUNCTION(return_type, name, arguments, source) +#else +#define BOOST_COMPUTE_FUNCTION(return_type, name, arguments, ...) \ + ::boost::compute::function<return_type arguments> name = \ + ::boost::compute::detail::make_function_impl<return_type arguments>( \ + #name, #arguments, #__VA_ARGS__ \ + ) +#endif + +#endif // BOOST_COMPUTE_FUNCTION_HPP diff --git a/boost/compute/functional.hpp b/boost/compute/functional.hpp new file mode 100644 index 0000000000..d2065216f4 --- /dev/null +++ b/boost/compute/functional.hpp @@ -0,0 +1,34 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_FUNCTIONAL_HPP +#define BOOST_COMPUTE_FUNCTIONAL_HPP + +/// \file +/// +/// Meta-header to include all Boost.Compute functional headers. + +#include <boost/compute/functional/as.hpp> +#include <boost/compute/functional/atomic.hpp> +#include <boost/compute/functional/common.hpp> +#include <boost/compute/functional/convert.hpp> +#include <boost/compute/functional/field.hpp> +#include <boost/compute/functional/geometry.hpp> +#include <boost/compute/functional/get.hpp> +#include <boost/compute/functional/hash.hpp> +#include <boost/compute/functional/identity.hpp> +#include <boost/compute/functional/integer.hpp> +#include <boost/compute/functional/logical.hpp> +#include <boost/compute/functional/math.hpp> +#include <boost/compute/functional/operator.hpp> +#include <boost/compute/functional/popcount.hpp> +#include <boost/compute/functional/relational.hpp> + +#endif // BOOST_COMPUTE_FUNCTIONAL_HPP diff --git a/boost/compute/functional/as.hpp b/boost/compute/functional/as.hpp new file mode 100644 index 0000000000..584bd0f38a --- /dev/null +++ b/boost/compute/functional/as.hpp @@ -0,0 +1,51 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_FUNCTIONAL_AS_HPP +#define BOOST_COMPUTE_FUNCTIONAL_AS_HPP + +namespace boost { +namespace compute { +namespace detail { + +template<class T, class Arg> +struct invoked_as +{ + invoked_as(const Arg &arg) + : m_arg(arg) + { + } + + Arg m_arg; +}; + +} // end detail namespace + +/// The \ref as function converts its argument to type \c T (similar to +/// reinterpret_cast<T>). +/// +/// \see \ref convert "convert<T>" +template<class T> +struct as +{ + typedef T result_type; + + /// \internal_ + template<class Arg> + detail::invoked_as<T, Arg> operator()(const Arg &arg) const + { + return detail::invoked_as<T, Arg>(arg); + } +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_FUNCTIONAL_AS_HPP diff --git a/boost/compute/functional/atomic.hpp b/boost/compute/functional/atomic.hpp new file mode 100644 index 0000000000..2701561bc3 --- /dev/null +++ b/boost/compute/functional/atomic.hpp @@ -0,0 +1,141 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_FUNCTIONAL_ATOMIC_HPP +#define BOOST_COMPUTE_FUNCTIONAL_ATOMIC_HPP + +#include <boost/compute/cl.hpp> +#include <boost/compute/function.hpp> + +#ifndef BOOST_COMPUTE_DOXYGEN_INVOKED +#ifdef CL_VERSION_1_1 + #define BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "atomic_" +#else + #define BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "atom_" +#endif +#endif // BOOST_COMPUTE_DOXYGEN_INVOKED + +namespace boost { +namespace compute { + +template<class T> +class atomic_add : public function<T (T*, T)> +{ +public: + atomic_add() + : function<T (T*, T)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "add") + { + } +}; + +template<class T> +class atomic_sub : public function<T (T*, T)> +{ +public: + atomic_sub() + : function<T (T*, T)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "sub") + { + } +}; + +template<class T> +class atomic_xchg : public function<T (T*, T)> +{ +public: + atomic_xchg() + : function<T (T*, T)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "xchg") + { + } +}; + +template<class T> +class atomic_inc : public function<T (T*)> +{ +public: + atomic_inc() + : function<T (T*)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "inc") + { + } +}; + +template<class T> +class atomic_dec : public function<T (T*)> +{ +public: + atomic_dec() + : function<T (T*)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "dec") + { + } +}; + +template<class T> +class atomic_cmpxchg : public function<T (T*, T, T)> +{ +public: + atomic_cmpxchg() + : function<T (T*, T, T)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "cmpxchg") + { + } +}; + +template<class T> +class atomic_max : public function<T (T*, T)> +{ +public: + atomic_max() + : function<T (T*, T)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "max") + { + } +}; + +template<class T> +class atomic_min : public function<T (T*, T)> +{ +public: + atomic_min() + : function<T (T*, T)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "min") + { + } +}; + +template<class T> +class atomic_and : public function<T (T*, T)> +{ +public: + atomic_and() + : function<T (T*, T)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "and") + { + } +}; + +template<class T> +class atomic_or : public function<T (T*, T)> +{ +public: + atomic_or() + : function<T (T*, T)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "or") + { + } +}; + +template<class T> +class atomic_xor : public function<T (T*, T)> +{ +public: + atomic_xor() + : function<T (T*, T)>(BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "xor") + { + } +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_FUNCTIONAL_ATOMIC_HPP diff --git a/boost/compute/functional/bind.hpp b/boost/compute/functional/bind.hpp new file mode 100644 index 0000000000..0c5929f3b9 --- /dev/null +++ b/boost/compute/functional/bind.hpp @@ -0,0 +1,261 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_FUNCTIONAL_BIND_HPP +#define BOOST_COMPUTE_FUNCTIONAL_BIND_HPP + +#include <boost/mpl/int.hpp> +#include <boost/tuple/tuple.hpp> +#include <boost/type_traits/conditional.hpp> + +#include <boost/compute/config.hpp> +#include <boost/compute/detail/meta_kernel.hpp> + +namespace boost { +namespace compute { +namespace placeholders { + +/// \internal_ +template<int I> +struct placeholder : boost::integral_constant<int, I> +{ + placeholder() { } +}; + +placeholder<0> const _1; +placeholder<1> const _2; + +} // end placeholders namespace + +/// Meta-function returning \c true if \c T is a placeholder type. +template<class T> +struct is_placeholder : boost::false_type +{ +}; + +/// \internal_ +template<int I> +struct is_placeholder<placeholders::placeholder<I> > : boost::true_type +{ +}; + +namespace detail { + +template<class Function, class BoundArgs, class Args> +struct invoked_bound_function +{ + invoked_bound_function(Function f, BoundArgs bound_args, Args args) + : m_function(f), + m_bound_args(bound_args), + m_args(args) + { + } + + // meta-function returning true if the N'th argument is a placeholder + template<int N> + struct is_placeholder_arg + { + typedef typename boost::tuples::element<N, BoundArgs>::type nth_bound_arg; + + typedef typename is_placeholder<nth_bound_arg>::type type; + static const bool value = is_placeholder<nth_bound_arg>::value; + }; + + template<class Arg> + struct get_arg_type + { + typedef Arg type; + }; + + template<int I> + struct get_arg_type<placeholders::placeholder<I> > + { + typedef typename boost::tuples::element<I, Args>::type type; + }; + + // meta-function returning the type of the N'th argument when invoked + template<int N> + struct get_nth_arg_type + { + typedef typename boost::tuples::element<N, BoundArgs>::type nth_bound_arg; + + typedef typename get_arg_type<nth_bound_arg>::type type; + }; + + template<int N> + typename get_nth_arg_type<N>::type get_nth_arg( + typename boost::enable_if_c<is_placeholder_arg<N>::value>::type* = 0 + ) const + { + typedef typename boost::tuples::element<N, BoundArgs>::type nth_bound_arg; + + return boost::get<nth_bound_arg::value>(m_args); + } + + template<int N> + typename get_nth_arg_type<N>::type get_nth_arg( + typename boost::disable_if_c<is_placeholder_arg<N>::value>::type* = 0 + ) const + { + return boost::get<N>(m_bound_args); + } + + Function m_function; + BoundArgs m_bound_args; + Args m_args; +}; + +template<class Function, class BoundArgs, class Args> +inline meta_kernel& apply_invoked_bound_function( + meta_kernel &k, + const invoked_bound_function<Function, BoundArgs, Args> &expr, + typename boost::enable_if_c< + boost::tuples::length<BoundArgs>::value == 1 + >::type* = 0 +) +{ + return k << expr.m_function(expr.template get_nth_arg<0>()); +} + +template<class Function, class BoundArgs, class Args> +inline meta_kernel& apply_invoked_bound_function( + meta_kernel &k, + const invoked_bound_function<Function, BoundArgs, Args> &expr, + typename boost::enable_if_c< + boost::tuples::length<BoundArgs>::value == 2 + >::type* = 0 +) +{ + return k << expr.m_function(expr.template get_nth_arg<0>(), + expr.template get_nth_arg<1>()); +} + +template<class Function, class BoundArgs, class Args> +inline meta_kernel& apply_invoked_bound_function( + meta_kernel &k, + const invoked_bound_function<Function, BoundArgs, Args> &expr, + typename boost::enable_if_c< + boost::tuples::length<BoundArgs>::value == 3 + >::type* = 0 +) +{ + return k << expr.m_function(expr.template get_nth_arg<0>(), + expr.template get_nth_arg<1>(), + expr.template get_nth_arg<2>()); +} + +template<class Function, class BoundArgs, class Args> +inline meta_kernel& operator<<( + meta_kernel &k, + const invoked_bound_function<Function, BoundArgs, Args> &expr +) +{ + return apply_invoked_bound_function(k, expr); +} + +template<class Function, class BoundArgs> +struct bound_function +{ + typedef int result_type; + + bound_function(Function f, BoundArgs args) + : m_function(f), + m_args(args) + { + } + + template<class Arg1> + detail::invoked_bound_function< + Function, + BoundArgs, + boost::tuple<Arg1> + > + operator()(const Arg1 &arg1) const + { + return detail::invoked_bound_function< + Function, + BoundArgs, + boost::tuple<Arg1> + >(m_function, m_args, boost::make_tuple(arg1)); + } + + template<class Arg1, class Arg2> + detail::invoked_bound_function< + Function, + BoundArgs, + boost::tuple<Arg1, Arg2> + > + operator()(const Arg1 &arg1, const Arg2 &arg2) const + { + return detail::invoked_bound_function< + Function, + BoundArgs, + boost::tuple<Arg1, Arg2> + >(m_function, m_args, boost::make_tuple(arg1, arg2)); + } + + Function m_function; + BoundArgs m_args; +}; + +} // end detail namespace + +#if !defined(BOOST_COMPUTE_NO_VARIADIC_TEMPLATES) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) +/// Returns a function wrapper which invokes \p f with \p args when called. +/// +/// For example, to generate a unary function object which returns \c true +/// when its argument is less than \c 7: +/// \code +/// using boost::compute::less; +/// using boost::compute::placeholders::_1; +/// +/// auto less_than_seven = boost::compute::bind(less<int>(), _1, 7); +/// \endcode +template<class F, class... Args> +inline detail::bound_function<F, boost::tuple<Args...> > +bind(F f, Args... args) +{ + typedef typename boost::tuple<Args...> ArgsTuple; + + return detail::bound_function<F, ArgsTuple>(f, boost::make_tuple(args...)); +} +#else +template<class F, class A1> +inline detail::bound_function<F, boost::tuple<A1> > +bind(F f, A1 a1) +{ + typedef typename boost::tuple<A1> Args; + + return detail::bound_function<F, Args>(f, boost::make_tuple(a1)); +} + +template<class F, class A1, class A2> +inline detail::bound_function<F, boost::tuple<A1, A2> > +bind(F f, A1 a1, A2 a2) +{ + typedef typename boost::tuple<A1, A2> Args; + + return detail::bound_function<F, Args>(f, boost::make_tuple(a1, a2)); +} + +template<class F, class A1, class A2, class A3> +inline detail::bound_function<F, boost::tuple<A1, A2, A3> > +bind(F f, A1 a1, A2 a2, A3 a3) +{ + typedef typename boost::tuple<A1, A2, A3> Args; + + return detail::bound_function<F, Args>(f, boost::make_tuple(a1, a2, a3)); +} +#endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_FUNCTIONAL_BIND_HPP diff --git a/boost/compute/functional/common.hpp b/boost/compute/functional/common.hpp new file mode 100644 index 0000000000..9ad8b43502 --- /dev/null +++ b/boost/compute/functional/common.hpp @@ -0,0 +1,29 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_FUNCTIONAL_COMMON_HPP +#define BOOST_COMPUTE_FUNCTIONAL_COMMON_HPP + +#include <boost/compute/functional/detail/macros.hpp> + +namespace boost { +namespace compute { + +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(clamp, T (T, T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(degrees, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(radians, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(sign, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(smoothstep, T (T, T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(step, T (T, T), class T) + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_FUNCTIONAL_COMMON_HPP diff --git a/boost/compute/functional/convert.hpp b/boost/compute/functional/convert.hpp new file mode 100644 index 0000000000..f182e8ec72 --- /dev/null +++ b/boost/compute/functional/convert.hpp @@ -0,0 +1,51 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_FUNCTIONAL_CONVERT_HPP +#define BOOST_COMPUTE_FUNCTIONAL_CONVERT_HPP + +namespace boost { +namespace compute { +namespace detail { + +template<class T, class Arg> +struct invoked_convert +{ + invoked_convert(const Arg &arg) + : m_arg(arg) + { + } + + Arg m_arg; +}; + +} // end detail namespace + +/// The \ref convert function converts its argument to type \c T (similar to +/// static_cast<T>). +/// +/// \see \ref as "as<T>" +template<class T> +struct convert +{ + typedef T result_type; + + /// \internal_ + template<class Arg> + detail::invoked_convert<T, Arg> operator()(const Arg &arg) const + { + return detail::invoked_convert<T, Arg>(arg); + } +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_FUNCTIONAL_CONVERT_HPP diff --git a/boost/compute/functional/detail/macros.hpp b/boost/compute/functional/detail/macros.hpp new file mode 100644 index 0000000000..71ae3722e5 --- /dev/null +++ b/boost/compute/functional/detail/macros.hpp @@ -0,0 +1,35 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_FUNCTIONAL_MACROS_HPP +#define BOOST_COMPUTE_FUNCTIONAL_MACROS_HPP + +#include <boost/preprocessor/cat.hpp> +#include <boost/preprocessor/stringize.hpp> + +#include <boost/compute/function.hpp> + +#define BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(name, signature, template_args) \ + template<template_args> \ + class name : public function<signature> \ + { \ + public: \ + (name)() : function<signature>(BOOST_PP_STRINGIZE(name)) { } \ + }; + +#define BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(name, signature, template_args) \ + template<template_args> \ + class BOOST_PP_CAT(name, _) : public function<signature> \ + { \ + public: \ + BOOST_PP_CAT(name, _)() : function<signature>(BOOST_PP_STRINGIZE(name)) { } \ + }; + +#endif // BOOST_COMPUTE_FUNCTIONAL_MACROS_HPP diff --git a/boost/compute/functional/detail/nvidia_ballot.hpp b/boost/compute/functional/detail/nvidia_ballot.hpp new file mode 100644 index 0000000000..cf66828f1c --- /dev/null +++ b/boost/compute/functional/detail/nvidia_ballot.hpp @@ -0,0 +1,48 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_FUNCTIONAL_DETAIL_NVIDIA_BALLOT_HPP +#define BOOST_COMPUTE_FUNCTIONAL_DETAIL_NVIDIA_BALLOT_HPP + +#include <boost/compute/function.hpp> +#include <boost/compute/types/fundamental.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class T> +class nvidia_ballot : public function<uint_(T)> +{ +public: + nvidia_ballot() + : function<uint_(T)>("nvidia_ballot") + { + this->set_source( + "inline uint nvidia_ballot(const uint x)\n" + "{\n" + " uint result;\n" + " asm volatile(\n" + " \"setp.ne.u32 %%p1, %1, 0;\"\n" + " \"vote.ballot.b32 %0, %%p1;\"\n" + " : \"=r\"(result)\n" + " : \"r\"(x)\n" + " );\n" + " return result;\n" + "}\n" + ); + } +}; + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_FUNCTIONAL_DETAIL_NVIDIA_BALLOT_HPP diff --git a/boost/compute/functional/detail/nvidia_popcount.hpp b/boost/compute/functional/detail/nvidia_popcount.hpp new file mode 100644 index 0000000000..b042ea4ba9 --- /dev/null +++ b/boost/compute/functional/detail/nvidia_popcount.hpp @@ -0,0 +1,42 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_FUNCTIONAL_DETAIL_NVIDIA_POPCOUNT_HPP +#define BOOST_COMPUTE_FUNCTIONAL_DETAIL_NVIDIA_POPCOUNT_HPP + +#include <boost/compute/function.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class T> +class nvidia_popcount : public function<T(T)> +{ +public: + nvidia_popcount() + : function<T(T)>("nvidia_popcount") + { + this->set_source( + "inline uint nvidia_popcount(const uint x)\n" + "{\n" + " uint count;\n" + " asm(\"popc.b32 %0, %1;\" : \"=r\"(count) : \"r\"(x));\n" + " return count;\n" + "}\n" + ); + } +}; + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_FUNCTIONAL_DETAIL_NVIDIA_POPCOUNT_HPP diff --git a/boost/compute/functional/detail/unpack.hpp b/boost/compute/functional/detail/unpack.hpp new file mode 100644 index 0000000000..e64672f142 --- /dev/null +++ b/boost/compute/functional/detail/unpack.hpp @@ -0,0 +1,143 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_FUNCTIONAL_DETAIL_UNPACK_HPP +#define BOOST_COMPUTE_FUNCTIONAL_DETAIL_UNPACK_HPP + +#include <boost/compute/functional/get.hpp> +#include <boost/compute/type_traits/is_vector_type.hpp> +#include <boost/compute/type_traits/result_of.hpp> +#include <boost/compute/type_traits/vector_size.hpp> +#include <boost/compute/detail/meta_kernel.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class Function, class Arg, size_t Arity> +struct invoked_unpacked +{ + invoked_unpacked(const Function &f, const Arg &arg) + : m_function(f), + m_arg(arg) + { + } + + Function m_function; + Arg m_arg; +}; + +template<class Function, class Arg, size_t Arity> +inline meta_kernel& operator<<(meta_kernel &k, const invoked_unpacked<Function, Arg, Arity> &expr); + +template<class Function, class Arg> +inline meta_kernel& operator<<(meta_kernel &k, const invoked_unpacked<Function, Arg, 1> &expr) +{ + return k << expr.m_function(get<0>()(expr.m_arg)); +} + +template<class Function, class Arg> +inline meta_kernel& operator<<(meta_kernel &k, const invoked_unpacked<Function, Arg, 2> &expr) +{ + return k << expr.m_function(get<0>()(expr.m_arg), get<1>()(expr.m_arg)); +} + +template<class Function, class Arg> +inline meta_kernel& operator<<(meta_kernel &k, const invoked_unpacked<Function, Arg, 3> &expr) +{ + return k << expr.m_function(get<0>()(expr.m_arg), get<1>()(expr.m_arg), get<2>()(expr.m_arg)); +} + +template<class Function> +struct unpacked +{ + template<class T, class Enable = void> + struct aggregate_length + { + BOOST_STATIC_CONSTANT(size_t, value = boost::tuples::length<T>::value); + }; + + template<class T> + struct aggregate_length<T, typename enable_if<is_vector_type<T> >::type> + { + BOOST_STATIC_CONSTANT(size_t, value = vector_size<T>::value); + }; + + template<class TupleArg, size_t TupleSize> + struct result_impl {}; + + template<class TupleArg> + struct result_impl<TupleArg, 1> + { + typedef typename detail::get_result_type<0, TupleArg>::type T1; + + typedef typename boost::compute::result_of<Function(T1)>::type type; + }; + + template<class TupleArg> + struct result_impl<TupleArg, 2> + { + typedef typename detail::get_result_type<0, TupleArg>::type T1; + typedef typename detail::get_result_type<1, TupleArg>::type T2; + + typedef typename boost::compute::result_of<Function(T1, T2)>::type type; + }; + + template<class TupleArg> + struct result_impl<TupleArg, 3> + { + typedef typename detail::get_result_type<0, TupleArg>::type T1; + typedef typename detail::get_result_type<1, TupleArg>::type T2; + typedef typename detail::get_result_type<2, TupleArg>::type T3; + + typedef typename boost::compute::result_of<Function(T1, T2, T3)>::type type; + }; + + template<class Signature> + struct result {}; + + template<class This, class Arg> + struct result<This(Arg)> + { + typedef typename result_impl<Arg, aggregate_length<Arg>::value>::type type; + }; + + unpacked(const Function &f) + : m_function(f) + { + } + + template<class Arg> + detail::invoked_unpacked< + Function, Arg, aggregate_length<typename Arg::result_type>::value + > + operator()(const Arg &arg) const + { + return detail::invoked_unpacked< + Function, + Arg, + aggregate_length<typename Arg::result_type>::value + >(m_function, arg); + } + + Function m_function; +}; + +template<class Function> +inline unpacked<Function> unpack(const Function &f) +{ + return unpacked<Function>(f); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_FUNCTIONAL_DETAIL_UNPACK_HPP diff --git a/boost/compute/functional/field.hpp b/boost/compute/functional/field.hpp new file mode 100644 index 0000000000..cd9d81541c --- /dev/null +++ b/boost/compute/functional/field.hpp @@ -0,0 +1,86 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_FUNCTIONAL_FIELD_HPP +#define BOOST_COMPUTE_FUNCTIONAL_FIELD_HPP + +#include <string> + +namespace boost { +namespace compute { +namespace detail { + +template<class T, class Arg> +struct invoked_field +{ + typedef T result_type; + + invoked_field(const Arg &arg, const std::string &field) + : m_arg(arg), + m_field(field) + { + } + + Arg m_arg; + std::string m_field; +}; + +} // end detail namespace + +/// Returns the named field from a value. +/// +/// The template-type \c T specifies the field's value type. Note +/// that the value type must match the actual type of the field +/// otherwise runtime compilation or logic errors may occur. +/// +/// For example, to access the \c second field in a +/// \c std::pair<int, float> object: +/// \code +/// field<float>("second"); +/// \endcode +/// +/// This can also be used with vector types to access individual +/// components as well as perform swizzle operations. +/// +/// For example, to access the first and third components of an +/// \c int vector type (e.g. \c int4): +/// \code +/// field<int2_>("xz"); +/// \endcode +/// +/// \see \ref get "get<N>" +template<class T> +class field +{ +public: + /// Result type. + typedef T result_type; + + /// Creates a new field functor with \p field. + field(const std::string &field) + : m_field(field) + { + } + + /// \internal_ + template<class Arg> + detail::invoked_field<T, Arg> operator()(const Arg &arg) const + { + return detail::invoked_field<T, Arg>(arg, m_field); + } + +private: + std::string m_field; +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_FUNCTIONAL_FIELD_HPP diff --git a/boost/compute/functional/geometry.hpp b/boost/compute/functional/geometry.hpp new file mode 100644 index 0000000000..ea37c02bfe --- /dev/null +++ b/boost/compute/functional/geometry.hpp @@ -0,0 +1,32 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_FUNCTIONAL_GEOMETRY_HPP +#define BOOST_COMPUTE_FUNCTIONAL_GEOMETRY_HPP + +#include <boost/compute/type_traits.hpp> +#include <boost/compute/functional/detail/macros.hpp> + +namespace boost { +namespace compute { + +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(cross, T (T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(dot, typename scalar_type<T>::type (T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(distance, typename scalar_type<T>::type (T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fast_distance, typename scalar_type<T>::type (T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(length, typename scalar_type<T>::type (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fast_length, typename scalar_type<T>::type (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(normalize, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fast_normalize, T (T), class T) + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_FUNCTIONAL_GEOMETRY_HPP diff --git a/boost/compute/functional/get.hpp b/boost/compute/functional/get.hpp new file mode 100644 index 0000000000..2d3b7a489c --- /dev/null +++ b/boost/compute/functional/get.hpp @@ -0,0 +1,76 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_FUNCTIONAL_GET_HPP +#define BOOST_COMPUTE_FUNCTIONAL_GET_HPP + +#include <cstddef> + +#include <boost/compute/types/fundamental.hpp> +#include <boost/compute/type_traits/scalar_type.hpp> + +namespace boost { +namespace compute { +namespace detail { + +// meta-function returning the result type for get<N>() +template<size_t N, class Arg> +struct get_result_type +{ + typedef typename scalar_type<Arg>::type type; +}; + +template<size_t N, class Arg, class T> +struct invoked_get +{ + typedef typename get_result_type<N, T>::type result_type; + + invoked_get(const Arg &arg) + : m_arg(arg) + { + } + + Arg m_arg; +}; + +} // end detail namespace + +/// Returns the \c N'th element of an aggregate type (e.g. scalarN, +/// pair, tuple, etc.). +/// +/// \see \ref field "field<T>" +template<size_t N> +struct get +{ + /// \internal_ + template<class> struct result; + + /// \internal_ + template<class F, class Arg> + struct result<F(Arg)> + { + typedef typename detail::get_result_type<N, Arg>::type type; + }; + + template<class Arg> + detail::invoked_get< + N, Arg, typename boost::remove_cv<typename Arg::result_type>::type + > operator()(const Arg &arg) const + { + typedef typename boost::remove_cv<typename Arg::result_type>::type T; + + return detail::invoked_get<N, Arg, T>(arg); + } +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_FUNCTIONAL_GET_HPP diff --git a/boost/compute/functional/hash.hpp b/boost/compute/functional/hash.hpp new file mode 100644 index 0000000000..830c422fdb --- /dev/null +++ b/boost/compute/functional/hash.hpp @@ -0,0 +1,91 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_FUNCTIONAL_HASH_HPP +#define BOOST_COMPUTE_FUNCTIONAL_HASH_HPP + +#include <boost/compute/function.hpp> +#include <boost/compute/types/fundamental.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class Key> +std::string make_hash_function_name() +{ + return std::string("boost_hash_") + type_name<Key>(); +} + +template<class Key> +inline std::string make_hash_function_source() +{ + std::stringstream source; + source << "inline ulong " << make_hash_function_name<Key>() + << "(const " << type_name<Key>() << " x)\n" + << "{\n" + // note we reinterpret the argument as a 32-bit uint and + // then promote it to a 64-bit ulong for the result type + << " ulong a = as_uint(x);\n" + << " a = (a ^ 61) ^ (a >> 16);\n" + << " a = a + (a << 3);\n" + << " a = a ^ (a >> 4);\n" + << " a = a * 0x27d4eb2d;\n" + << " a = a ^ (a >> 15);\n" + << " return a;\n" + << "}\n"; + return source.str(); +} + +template<class Key> +struct hash_impl +{ + typedef Key argument_type; + typedef ulong_ result_type; + + hash_impl() + : m_function("") + { + m_function = make_function_from_source<result_type(argument_type)>( + make_hash_function_name<argument_type>(), + make_hash_function_source<argument_type>() + ); + } + + template<class Arg> + invoked_function<result_type, boost::tuple<Arg> > + operator()(const Arg &arg) const + { + return m_function(arg); + } + + function<result_type(argument_type)> m_function; +}; + +} // end detail namespace + +/// The hash function returns a hash value for the input value. +/// +/// The return type is \c ulong_ (the OpenCL unsigned long type). +template<class Key> struct hash; + +/// \internal_ +template<> struct hash<int_> : detail::hash_impl<int_> { }; + +/// \internal_ +template<> struct hash<uint_> : detail::hash_impl<uint_> { }; + +/// \internal_ +template<> struct hash<float_> : detail::hash_impl<float_> { }; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_FUNCTIONAL_HASH_HPP diff --git a/boost/compute/functional/identity.hpp b/boost/compute/functional/identity.hpp new file mode 100644 index 0000000000..72740d9788 --- /dev/null +++ b/boost/compute/functional/identity.hpp @@ -0,0 +1,64 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_FUNCTIONAL_IDENTITY_HPP +#define BOOST_COMPUTE_FUNCTIONAL_IDENTITY_HPP + +namespace boost { +namespace compute { +namespace detail { + +template<class T, class Arg> +struct invoked_identity +{ + typedef T result_type; + + invoked_identity(const Arg &arg) + : m_arg(arg) + { + } + + Arg m_arg; +}; + +} // end detail namespace + +/// Identity function which simply returns its input. +/// +/// For example, to directly copy values using the transform() algorithm: +/// \code +/// transform(input.begin(), input.end(), output.begin(), identity<int>(), queue); +/// \endcode +/// +/// \see \ref as "as<T>", \ref convert "convert<T>" +template<class T> +class identity +{ +public: + /// Identity function result type. + typedef T result_type; + + /// Creates a new identity function. + identity() + { + } + + /// \internal_ + template<class Arg> + detail::invoked_identity<T, Arg> operator()(const Arg &arg) const + { + return detail::invoked_identity<T, Arg>(arg); + } +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_FUNCTIONAL_IDENTITY_HPP diff --git a/boost/compute/functional/integer.hpp b/boost/compute/functional/integer.hpp new file mode 100644 index 0000000000..8ff6c2bd4a --- /dev/null +++ b/boost/compute/functional/integer.hpp @@ -0,0 +1,30 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_FUNCTIONAL_INTEGER_HPP +#define BOOST_COMPUTE_FUNCTIONAL_INTEGER_HPP + +#include <boost/compute/functional/detail/macros.hpp> + +namespace boost { +namespace compute { + +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(abs, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(abs_diff, T (T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(add_sat, T (T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(hadd, T (T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(rhadd, T (T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(max, T (T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(min, T (T, T), class T) + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_FUNCTIONAL_INTEGER_HPP diff --git a/boost/compute/functional/logical.hpp b/boost/compute/functional/logical.hpp new file mode 100644 index 0000000000..2e2c7518b5 --- /dev/null +++ b/boost/compute/functional/logical.hpp @@ -0,0 +1,208 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_FUNCTIONAL_LOGICAL_HPP +#define BOOST_COMPUTE_FUNCTIONAL_LOGICAL_HPP + +namespace boost { +namespace compute { +namespace detail { + +template<class Predicate, class Expr> +class invoked_unary_negate_function +{ +public: + typedef int result_type; + + invoked_unary_negate_function(const Predicate &pred, + const Expr &expr) + : m_pred(pred), + m_expr(expr) + { + } + + Predicate pred() const + { + return m_pred; + } + + Expr expr() const + { + return m_expr; + } + +private: + Predicate m_pred; + Expr m_expr; +}; + +template<class Predicate, class Expr1, class Expr2> +class invoked_binary_negate_function +{ +public: + typedef int result_type; + + invoked_binary_negate_function(const Predicate &pred, + const Expr1 &expr1, + const Expr2 &expr2) + : m_pred(pred), + m_expr1(expr1), + m_expr2(expr2) + { + } + + Predicate pred() const + { + return m_pred; + } + + Expr1 expr1() const + { + return m_expr1; + } + + Expr2 expr2() const + { + return m_expr2; + } + +private: + Predicate m_pred; + Expr1 m_expr1; + Expr2 m_expr2; +}; + +} // end detail namespace + +/// \internal_ +template<class Arg, class Result> +struct unary_function +{ + typedef Arg argument_type; + typedef Result result_type; +}; + +/// \internal_ +template<class Arg1, class Arg2, class Result> +struct binary_function +{ + typedef Arg1 first_argument_type; + typedef Arg2 second_argument_type; + typedef Result result_type; +}; + +/// \internal_ +template<class Arg1, class Arg2, class Arg3, class Result> +struct ternary_function +{ + typedef Arg1 first_argument_type; + typedef Arg2 second_argument_type; + typedef Arg3 third_argument_type; + typedef Result result_type; +}; + +/// The unary_negate function adaptor negates a unary function. +/// +/// \see not1() +template<class Predicate> +class unary_negate : public unary_function<void, int> +{ +public: + explicit unary_negate(Predicate pred) + : m_pred(pred) + { + } + + /// \internal_ + template<class Arg> + detail::invoked_unary_negate_function<Predicate, Arg> + operator()(const Arg &arg) const + { + return detail::invoked_unary_negate_function< + Predicate, + Arg + >(m_pred, arg); + } + +private: + Predicate m_pred; +}; + +/// The binnary_negate function adaptor negates a binary function. +/// +/// \see not2() +template<class Predicate> +class binary_negate : public binary_function<void, void, int> +{ +public: + explicit binary_negate(Predicate pred) + : m_pred(pred) + { + } + + /// \internal_ + template<class Arg1, class Arg2> + detail::invoked_binary_negate_function<Predicate, Arg1, Arg2> + operator()(const Arg1 &arg1, const Arg2 &arg2) const + { + return detail::invoked_binary_negate_function< + Predicate, + Arg1, + Arg2 + >(m_pred, arg1, arg2); + } + +private: + Predicate m_pred; +}; + +/// Returns a unary_negate adaptor around \p predicate. +/// +/// \param predicate the unary function to wrap +/// +/// \return a unary_negate wrapper around \p predicate +template<class Predicate> +inline unary_negate<Predicate> not1(const Predicate &predicate) +{ + return unary_negate<Predicate>(predicate); +} + +/// Returns a binary_negate adaptor around \p predicate. +/// +/// \param predicate the binary function to wrap +/// +/// \return a binary_negate wrapper around \p predicate +template<class Predicate> +inline binary_negate<Predicate> not2(const Predicate &predicate) +{ + return binary_negate<Predicate>(predicate); +} + +/// The logical_not function negates its argument and returns it. +/// +/// \see not1(), not2() +template<class T> +struct logical_not : public unary_function<T, int> +{ + /// \internal_ + template<class Expr> + detail::invoked_function<int, boost::tuple<Expr> > + operator()(const Expr &expr) const + { + return detail::invoked_function<int, boost::tuple<Expr> >( + "!", std::string(), boost::make_tuple(expr) + ); + } +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_FUNCTIONAL_LOGICAL_HPP diff --git a/boost/compute/functional/math.hpp b/boost/compute/functional/math.hpp new file mode 100644 index 0000000000..6dea05f6f2 --- /dev/null +++ b/boost/compute/functional/math.hpp @@ -0,0 +1,80 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_FUNCTIONAL_MATH_HPP +#define BOOST_COMPUTE_FUNCTIONAL_MATH_HPP + +#include <boost/compute/functional/detail/macros.hpp> + +namespace boost { +namespace compute { + +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(acos, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(acosh, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(acospi, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(asin, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(asinh, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(asinpi, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(atan, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(atan2, T (T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(atanh, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(atanpi, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(atan2pi, T (T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(cbrt, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(ceil, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(copysign, T (T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(cos, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(cosh, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(cospi, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(erf, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(erfc, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(exp, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(exp2, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(exp10, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(expm1, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fabs, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fdim, T (T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(floor, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fma, T (T, T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fmax, T (T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fmin, T (T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(fmod, T (T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(hypot, T (T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(ilogb, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(lgamma, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(log, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(log2, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(log10, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(log1p, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(logb, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(mad, T (T, T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(nextafter, T (T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(pow, T (T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(pown, T (T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(powr, T (T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(remainder, T (T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(rint, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(rootn, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(round, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(rsqrt, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(sin, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(sinh, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(sinpi, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(sqrt, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(tan, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(tanh, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(tanpi, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(tgamma, T (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(trunc, T (T), class T) + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_FUNCTIONAL_MATH_HPP diff --git a/boost/compute/functional/operator.hpp b/boost/compute/functional/operator.hpp new file mode 100644 index 0000000000..908372a326 --- /dev/null +++ b/boost/compute/functional/operator.hpp @@ -0,0 +1,100 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_FUNCTIONAL_OPERATORS_HPP +#define BOOST_COMPUTE_FUNCTIONAL_OPERATORS_HPP + +#include <string> + +namespace boost { +namespace compute { +namespace detail { + +template<class Expr1, class Expr2, class Result> +struct invoked_binary_operator +{ + typedef Result result_type; + + invoked_binary_operator(const std::string &op, + const Expr1 &arg1, + const Expr2 &arg2) + : m_op(op), + m_expr1(arg1), + m_expr2(arg2) + { + } + + std::string op() const + { + return m_op; + } + + Expr1 arg1() const + { + return m_expr1; + } + + Expr2 arg2() const + { + return m_expr2; + } + + std::string m_op; + Expr1 m_expr1; + Expr2 m_expr2; +}; + +} // end detail namespace + +/// \internal_ +#define BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(name, op, return_type, arg_type) \ + template<class arg_type> \ + class name : public function<return_type (arg_type, arg_type)> \ + { \ + public: \ + name() : function<return_type (arg_type, arg_type)>(BOOST_PP_STRINGIZE(name)) { } \ + \ + template<class Arg1, class Arg2> \ + detail::invoked_binary_operator<Arg1, Arg2, T> \ + operator()(const Arg1 &x, const Arg2 &y) const \ + { \ + return detail::invoked_binary_operator<Arg1, Arg2, T>(op, x, y); \ + } \ + }; + +// arithmetic operations +BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(plus, "+", T, T) +BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(minus, "-", T, T) +BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(multiplies, "*", T, T) +BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(divides, "/", T, T) +BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(modulus, "%", T, T) + +// comparisons +BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(equal_to, "==", T, T) +BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(not_equal_to, "!=", T, T) +BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(greater, ">", T, T) +BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(less, "<", T, T) +BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(greater_equal, ">=", T, T) +BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(less_equal, "<=", T, T) + +// logical operators +BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(logical_and, "&&", T, T) +BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(logical_or, "||", T, T) + +// bitwise operations +BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(bit_and, "&", T, T) +BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(bit_or, "|", T, T) +BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(bit_xor, "^", T, T) +BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(shift_left, "<<", T, T) +BOOST_COMPUTE_DECLARE_BINARY_OPERATOR(shift_right, ">>", T, T) + +} // end compute namespace +} // end boost namespace +#endif // BOOST_COMPUTE_FUNCTIONAL_OPERATORS_HPP diff --git a/boost/compute/functional/popcount.hpp b/boost/compute/functional/popcount.hpp new file mode 100644 index 0000000000..7326e7022f --- /dev/null +++ b/boost/compute/functional/popcount.hpp @@ -0,0 +1,55 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_FUNCTIONAL_POPCOUNT_HPP +#define BOOST_COMPUTE_FUNCTIONAL_POPCOUNT_HPP + +#include <boost/compute/function.hpp> +#include <boost/compute/type_traits/type_name.hpp> + +namespace boost { +namespace compute { + +/// Returns the number of non-zero bits in \p x. +/// +/// \see_opencl_ref{popcount} +template<class T> +class popcount : public function<T(T)> +{ +public: + popcount() + : function<T(T)>("boost_popcount") + { + std::stringstream s; + s << "inline " << type_name<T>() << " boost_popcount" + << "(const " << type_name<T>() << " x)\n" + << "{\n" + // use built-in popcount if opencl 1.2 is supported + << "#if __OPENCL_VERSION__ >= 120\n" + << " return popcount(x);\n" + // fallback to generic popcount() implementation + << "#else\n" + << " " << type_name<T>() << " count = 0;\n" + << " for(" << type_name<T>() << " i = 0; i < sizeof(i) * CHAR_BIT; i++){\n" + << " if(x & (" << type_name<T>() << ") 1 << i){\n" + << " count++;\n" + << " }\n" + << " }\n" + << " return count;\n" + << "#endif\n" + << "}\n"; + this->set_source(s.str()); + } +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_FUNCTIONAL_POPCOUNT_HPP diff --git a/boost/compute/functional/relational.hpp b/boost/compute/functional/relational.hpp new file mode 100644 index 0000000000..1a88052c05 --- /dev/null +++ b/boost/compute/functional/relational.hpp @@ -0,0 +1,39 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_FUNCTIONAL_RELATIONAL_HPP +#define BOOST_COMPUTE_FUNCTIONAL_RELATIONAL_HPP + +#include <boost/compute/functional/detail/macros.hpp> + +namespace boost { +namespace compute { + +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isequal, int (T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isnotequal, int (T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isgreater, int (T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isgreaterequal, int (T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isless, int (T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(islessequal, int (T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(islessgreater, int (T, T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isfinite, int (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isinf, int (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isnan, int (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isnormal, int (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isordered, int (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(isunordered, int (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION_UNDERSCORE(signbit, int (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(any, int (T), class T) +BOOST_COMPUTE_DECLARE_BUILTIN_FUNCTION(all, int (T), class T) + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_FUNCTIONAL_RELATIONAL_HPP diff --git a/boost/compute/image.hpp b/boost/compute/image.hpp new file mode 100644 index 0000000000..1a7b9ca061 --- /dev/null +++ b/boost/compute/image.hpp @@ -0,0 +1,25 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_IMAGE_HPP +#define BOOST_COMPUTE_IMAGE_HPP + +/// \file +/// +/// Meta-header to include all Boost.Compute image headers. + +#include <boost/compute/image/image1d.hpp> +#include <boost/compute/image/image2d.hpp> +#include <boost/compute/image/image3d.hpp> +#include <boost/compute/image/image_format.hpp> +#include <boost/compute/image/image_object.hpp> +#include <boost/compute/image/image_sampler.hpp> + +#endif // BOOST_COMPUTE_IMAGE_HPP diff --git a/boost/compute/image/image1d.hpp b/boost/compute/image/image1d.hpp new file mode 100644 index 0000000000..2d71934ab4 --- /dev/null +++ b/boost/compute/image/image1d.hpp @@ -0,0 +1,204 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_IMAGE_IMAGE1D_HPP +#define BOOST_COMPUTE_IMAGE_IMAGE1D_HPP + +#include <boost/throw_exception.hpp> + +#include <boost/compute/config.hpp> +#include <boost/compute/exception/opencl_error.hpp> +#include <boost/compute/image/image_format.hpp> +#include <boost/compute/image/image_object.hpp> +#include <boost/compute/type_traits/type_name.hpp> +#include <boost/compute/utility/extents.hpp> + +namespace boost { +namespace compute { + +// forward declarations +class command_queue; + +/// \class image1d +/// \brief An OpenCL 1D image object +/// +/// \opencl_version_warning{1,2} +/// +/// \see image_format, image2d +class image1d : public image_object +{ +public: + /// Creates a null image1d object. + image1d() + : image_object() + { + } + + /// Creates a new image1d object. + /// + /// \see_opencl_ref{clCreateImage} + image1d(const context &context, + size_t image_width, + const image_format &format, + cl_mem_flags flags = read_write, + void *host_ptr = 0) + { + #ifdef CL_VERSION_1_2 + cl_image_desc desc; + desc.image_type = CL_MEM_OBJECT_IMAGE1D; + desc.image_width = image_width; + desc.image_height = 1; + desc.image_depth = 1; + desc.image_array_size = 0; + desc.image_row_pitch = 0; + desc.image_slice_pitch = 0; + desc.num_mip_levels = 0; + desc.num_samples = 0; + #ifdef CL_VERSION_2_0 + desc.mem_object = 0; + #else + desc.buffer = 0; + #endif + + cl_int error = 0; + + m_mem = clCreateImage( + context, flags, format.get_format_ptr(), &desc, host_ptr, &error + ); + + if(!m_mem){ + BOOST_THROW_EXCEPTION(opencl_error(error)); + } + #else + // image1d objects are only supported in OpenCL 1.2 and later + BOOST_THROW_EXCEPTION(opencl_error(CL_IMAGE_FORMAT_NOT_SUPPORTED)); + #endif + } + + /// Creates a new image1d as a copy of \p other. + image1d(const image1d &other) + : image_object(other) + { + } + + /// Copies the image1d from \p other. + image1d& operator=(const image1d &other) + { + image_object::operator=(other); + + return *this; + } + + #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES + /// Move-constructs a new image object from \p other. + image1d(image1d&& other) BOOST_NOEXCEPT + : image_object(std::move(other)) + { + } + + /// Move-assigns the image from \p other to \c *this. + image1d& operator=(image1d&& other) BOOST_NOEXCEPT + { + image_object::operator=(std::move(other)); + + return *this; + } + #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES + + /// Destroys the image1d object. + ~image1d() + { + } + + /// Returns the size (width) of the image. + extents<1> size() const + { + extents<1> size; + size[0] = get_info<size_t>(CL_IMAGE_WIDTH); + return size; + } + + /// Returns the origin of the image (\c 0). + extents<1> origin() const + { + return extents<1>(); + } + + /// Returns information about the image. + /// + /// \see_opencl_ref{clGetImageInfo} + template<class T> + T get_info(cl_image_info info) const + { + return get_image_info<T>(info); + } + + /// \overload + template<int Enum> + typename detail::get_object_info_type<image1d, Enum>::type + get_info() const; + + /// Returns the supported image formats for the context. + /// + /// \see_opencl_ref{clGetSupportedImageFormats} + static std::vector<image_format> + get_supported_formats(const context &context, cl_mem_flags flags = read_write) + { + #ifdef CL_VERSION_1_2 + return image_object::get_supported_formats(context, CL_MEM_OBJECT_IMAGE1D, flags); + #else + return std::vector<image_format>(); + #endif + } + + /// Returns \c true if \p format is a supported 1D image format for + /// \p context. + static bool is_supported_format(const image_format &format, + const context &context, + cl_mem_flags flags = read_write) + { + #ifdef CL_VERSION_1_2 + return image_object::is_supported_format( + format, context, CL_MEM_OBJECT_IMAGE1D, flags + ); + #else + return false; + #endif + } + + /// Creates a new image with a copy of the data in \c *this. Uses \p queue + /// to perform the copy operation. + image1d clone(command_queue &queue) const; +}; + +/// \internal_ define get_info() specializations for image1d +BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(image1d, + ((cl_image_format, CL_IMAGE_FORMAT)) + ((size_t, CL_IMAGE_ELEMENT_SIZE)) + ((size_t, CL_IMAGE_ROW_PITCH)) + ((size_t, CL_IMAGE_SLICE_PITCH)) + ((size_t, CL_IMAGE_WIDTH)) + ((size_t, CL_IMAGE_HEIGHT)) + ((size_t, CL_IMAGE_DEPTH)) +) + +namespace detail { + +// set_kernel_arg() specialization for image1d +template<> +struct set_kernel_arg<image1d> : public set_kernel_arg<image_object> { }; + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +BOOST_COMPUTE_TYPE_NAME(boost::compute::image1d, image1d_t) + +#endif // BOOST_COMPUTE_IMAGE_IMAGE1D_HPP diff --git a/boost/compute/image/image2d.hpp b/boost/compute/image/image2d.hpp new file mode 100644 index 0000000000..c203a9417f --- /dev/null +++ b/boost/compute/image/image2d.hpp @@ -0,0 +1,262 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_IMAGE_IMAGE2D_HPP +#define BOOST_COMPUTE_IMAGE_IMAGE2D_HPP + +#include <boost/throw_exception.hpp> + +#include <boost/compute/config.hpp> +#include <boost/compute/context.hpp> +#include <boost/compute/exception/opencl_error.hpp> +#include <boost/compute/image/image_format.hpp> +#include <boost/compute/image/image_object.hpp> +#include <boost/compute/detail/get_object_info.hpp> +#include <boost/compute/type_traits/type_name.hpp> +#include <boost/compute/utility/extents.hpp> + +namespace boost { +namespace compute { + +// forward declarations +class command_queue; + +/// \class image2d +/// \brief An OpenCL 2D image object +/// +/// For example, to create a 640x480 8-bit RGBA image: +/// +/// \snippet test/test_image2d.cpp create_image +/// +/// \see image_format, image3d +class image2d : public image_object +{ +public: + /// Creates a null image2d object. + image2d() + : image_object() + { + } + + /// Creates a new image2d object. + /// + /// \see_opencl_ref{clCreateImage} + image2d(const context &context, + size_t image_width, + size_t image_height, + const image_format &format, + cl_mem_flags flags = read_write, + void *host_ptr = 0, + size_t image_row_pitch = 0) + { + cl_int error = 0; + + #ifdef CL_VERSION_1_2 + cl_image_desc desc; + desc.image_type = CL_MEM_OBJECT_IMAGE2D; + desc.image_width = image_width; + desc.image_height = image_height; + desc.image_depth = 1; + desc.image_array_size = 0; + desc.image_row_pitch = image_row_pitch; + desc.image_slice_pitch = 0; + desc.num_mip_levels = 0; + desc.num_samples = 0; + #ifdef CL_VERSION_2_0 + desc.mem_object = 0; + #else + desc.buffer = 0; + #endif + + m_mem = clCreateImage(context, + flags, + format.get_format_ptr(), + &desc, + host_ptr, + &error); + #else + m_mem = clCreateImage2D(context, + flags, + format.get_format_ptr(), + image_width, + image_height, + image_row_pitch, + host_ptr, + &error); + #endif + + if(!m_mem){ + BOOST_THROW_EXCEPTION(opencl_error(error)); + } + } + + /// \internal_ (deprecated) + image2d(const context &context, + cl_mem_flags flags, + const image_format &format, + size_t image_width, + size_t image_height, + size_t image_row_pitch = 0, + void *host_ptr = 0) + { + cl_int error = 0; + + #ifdef CL_VERSION_1_2 + cl_image_desc desc; + desc.image_type = CL_MEM_OBJECT_IMAGE2D; + desc.image_width = image_width; + desc.image_height = image_height; + desc.image_depth = 1; + desc.image_array_size = 0; + desc.image_row_pitch = image_row_pitch; + desc.image_slice_pitch = 0; + desc.num_mip_levels = 0; + desc.num_samples = 0; + #ifdef CL_VERSION_2_0 + desc.mem_object = 0; + #else + desc.buffer = 0; + #endif + + m_mem = clCreateImage(context, + flags, + format.get_format_ptr(), + &desc, + host_ptr, + &error); + #else + m_mem = clCreateImage2D(context, + flags, + format.get_format_ptr(), + image_width, + image_height, + image_row_pitch, + host_ptr, + &error); + #endif + + if(!m_mem){ + BOOST_THROW_EXCEPTION(opencl_error(error)); + } + } + + /// Creates a new image2d as a copy of \p other. + image2d(const image2d &other) + : image_object(other) + { + } + + /// Copies the image2d from \p other. + image2d& operator=(const image2d &other) + { + image_object::operator=(other); + + return *this; + } + + #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES + /// Move-constructs a new image object from \p other. + image2d(image2d&& other) BOOST_NOEXCEPT + : image_object(std::move(other)) + { + } + + /// Move-assigns the image from \p other to \c *this. + image2d& operator=(image2d&& other) BOOST_NOEXCEPT + { + image_object::operator=(std::move(other)); + + return *this; + } + #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES + + /// Destroys the image2d object. + ~image2d() + { + } + + /// Returns the size (width, height) of the image. + extents<2> size() const + { + extents<2> size; + size[0] = get_info<size_t>(CL_IMAGE_WIDTH); + size[1] = get_info<size_t>(CL_IMAGE_HEIGHT); + return size; + } + + /// Returns the origin of the image (\c 0, \c 0). + extents<2> origin() const + { + return extents<2>(); + } + + /// Returns information about the image. + /// + /// \see_opencl_ref{clGetImageInfo} + template<class T> + T get_info(cl_image_info info) const + { + return detail::get_object_info<T>(clGetImageInfo, m_mem, info); + } + + /// \overload + template<int Enum> + typename detail::get_object_info_type<image2d, Enum>::type + get_info() const; + + /// Returns the supported image formats for the context. + /// + /// \see_opencl_ref{clGetSupportedImageFormats} + static std::vector<image_format> + get_supported_formats(const context &context, cl_mem_flags flags = read_write) + { + return image_object::get_supported_formats(context, CL_MEM_OBJECT_IMAGE2D, flags); + } + + /// Returns \c true if \p format is a supported 2D image format for + /// \p context. + static bool is_supported_format(const image_format &format, + const context &context, + cl_mem_flags flags = read_write) + { + return image_object::is_supported_format( + format, context, CL_MEM_OBJECT_IMAGE2D, flags + ); + } + + /// Creates a new image with a copy of the data in \c *this. Uses \p queue + /// to perform the copy operation. + image2d clone(command_queue &queue) const; +}; + +/// \internal_ define get_info() specializations for image2d +BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(image2d, + ((cl_image_format, CL_IMAGE_FORMAT)) + ((size_t, CL_IMAGE_ELEMENT_SIZE)) + ((size_t, CL_IMAGE_ROW_PITCH)) + ((size_t, CL_IMAGE_SLICE_PITCH)) + ((size_t, CL_IMAGE_WIDTH)) + ((size_t, CL_IMAGE_HEIGHT)) + ((size_t, CL_IMAGE_DEPTH)) +) + +namespace detail { + +// set_kernel_arg() specialization for image2d +template<> +struct set_kernel_arg<image2d> : public set_kernel_arg<image_object> { }; + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +BOOST_COMPUTE_TYPE_NAME(boost::compute::image2d, image2d_t) + +#endif // BOOST_COMPUTE_IMAGE_IMAGE2D_HPP diff --git a/boost/compute/image/image3d.hpp b/boost/compute/image/image3d.hpp new file mode 100644 index 0000000000..9463cfaa16 --- /dev/null +++ b/boost/compute/image/image3d.hpp @@ -0,0 +1,265 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_IMAGE_IMAGE3D_HPP +#define BOOST_COMPUTE_IMAGE_IMAGE3D_HPP + +#include <boost/throw_exception.hpp> + +#include <boost/compute/detail/get_object_info.hpp> +#include <boost/compute/exception/opencl_error.hpp> +#include <boost/compute/image/image_format.hpp> +#include <boost/compute/image/image_object.hpp> +#include <boost/compute/type_traits/type_name.hpp> +#include <boost/compute/utility/extents.hpp> + +namespace boost { +namespace compute { + +// forward declarations +class command_queue; + +/// \class image3d +/// \brief An OpenCL 3D image object +/// +/// \see image_format, image2d +class image3d : public image_object +{ +public: + /// Creates a null image3d object. + image3d() + : image_object() + { + } + + /// Creates a new image3d object. + /// + /// \see_opencl_ref{clCreateImage} + image3d(const context &context, + size_t image_width, + size_t image_height, + size_t image_depth, + const image_format &format, + cl_mem_flags flags = read_write, + void *host_ptr = 0, + size_t image_row_pitch = 0, + size_t image_slice_pitch = 0) + { + cl_int error = 0; + + #ifdef CL_VERSION_1_2 + cl_image_desc desc; + desc.image_type = CL_MEM_OBJECT_IMAGE3D; + desc.image_width = image_width; + desc.image_height = image_height; + desc.image_depth = image_depth; + desc.image_array_size = 0; + desc.image_row_pitch = image_row_pitch; + desc.image_slice_pitch = image_slice_pitch; + desc.num_mip_levels = 0; + desc.num_samples = 0; + #ifdef CL_VERSION_2_0 + desc.mem_object = 0; + #else + desc.buffer = 0; + #endif + + m_mem = clCreateImage(context, + flags, + format.get_format_ptr(), + &desc, + host_ptr, + &error); + #else + m_mem = clCreateImage3D(context, + flags, + format.get_format_ptr(), + image_width, + image_height, + image_depth, + image_row_pitch, + image_slice_pitch, + host_ptr, + &error); + #endif + + if(!m_mem){ + BOOST_THROW_EXCEPTION(opencl_error(error)); + } + } + + /// \internal_ (deprecated) + image3d(const context &context, + cl_mem_flags flags, + const image_format &format, + size_t image_width, + size_t image_height, + size_t image_depth, + size_t image_row_pitch, + size_t image_slice_pitch = 0, + void *host_ptr = 0) + { + cl_int error = 0; + + #ifdef CL_VERSION_1_2 + cl_image_desc desc; + desc.image_type = CL_MEM_OBJECT_IMAGE3D; + desc.image_width = image_width; + desc.image_height = image_height; + desc.image_depth = image_depth; + desc.image_array_size = 0; + desc.image_row_pitch = image_row_pitch; + desc.image_slice_pitch = image_slice_pitch; + desc.num_mip_levels = 0; + desc.num_samples = 0; + #ifdef CL_VERSION_2_0 + desc.mem_object = 0; + #else + desc.buffer = 0; + #endif + + m_mem = clCreateImage(context, + flags, + format.get_format_ptr(), + &desc, + host_ptr, + &error); + #else + m_mem = clCreateImage3D(context, + flags, + format.get_format_ptr(), + image_width, + image_height, + image_depth, + image_row_pitch, + image_slice_pitch, + host_ptr, + &error); + #endif + + if(!m_mem){ + BOOST_THROW_EXCEPTION(opencl_error(error)); + } + } + + /// Creates a new image3d as a copy of \p other. + image3d(const image3d &other) + : image_object(other) + { + } + + /// Copies the image3d from \p other. + image3d& operator=(const image3d &other) + { + image_object::operator=(other); + + return *this; + } + + #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES + /// Move-constructs a new image object from \p other. + image3d(image3d&& other) BOOST_NOEXCEPT + : image_object(std::move(other)) + { + } + + /// Move-assigns the image from \p other to \c *this. + image3d& operator=(image3d&& other) BOOST_NOEXCEPT + { + image_object::operator=(std::move(other)); + + return *this; + } + #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES + + /// Destroys the image3d object. + ~image3d() + { + } + + /// Returns the size (width, height, depth) of the image. + extents<3> size() const + { + extents<3> size; + size[0] = get_info<size_t>(CL_IMAGE_WIDTH); + size[1] = get_info<size_t>(CL_IMAGE_HEIGHT); + size[2] = get_info<size_t>(CL_IMAGE_DEPTH); + return size; + } + + /// Returns the origin of the image (\c 0, \c 0, \c 0). + extents<3> origin() const + { + return extents<3>(); + } + + /// Returns information about the image. + /// + /// \see_opencl_ref{clGetImageInfo} + template<class T> + T get_info(cl_image_info info) const + { + return detail::get_object_info<T>(clGetImageInfo, m_mem, info); + } + + /// \overload + template<int Enum> + typename detail::get_object_info_type<image3d, Enum>::type + get_info() const; + + /// Returns the supported 3D image formats for the context. + /// + /// \see_opencl_ref{clGetSupportedImageFormats} + static std::vector<image_format> + get_supported_formats(const context &context, cl_mem_flags flags = read_write) + { + return image_object::get_supported_formats(context, CL_MEM_OBJECT_IMAGE3D, flags); + } + + /// Returns \c true if \p format is a supported 3D image format for + /// \p context. + static bool is_supported_format(const image_format &format, + const context &context, + cl_mem_flags flags = read_write) + { + return image_object::is_supported_format( + format, context, CL_MEM_OBJECT_IMAGE3D, flags + ); + } + + /// Creates a new image with a copy of the data in \c *this. Uses \p queue + /// to perform the copy operation. + image3d clone(command_queue &queue) const; +}; + +/// \internal_ define get_info() specializations for image3d +BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(image3d, + ((cl_image_format, CL_IMAGE_FORMAT)) + ((size_t, CL_IMAGE_ELEMENT_SIZE)) + ((size_t, CL_IMAGE_ROW_PITCH)) + ((size_t, CL_IMAGE_SLICE_PITCH)) + ((size_t, CL_IMAGE_WIDTH)) + ((size_t, CL_IMAGE_HEIGHT)) + ((size_t, CL_IMAGE_DEPTH)) +) + +namespace detail { + +// set_kernel_arg() specialization for image3d +template<> +struct set_kernel_arg<image3d> : public set_kernel_arg<image_object> { }; + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +BOOST_COMPUTE_TYPE_NAME(boost::compute::image3d, image3d_t) + +#endif // BOOST_COMPUTE_IMAGE_IMAGE3D_HPP diff --git a/boost/compute/image/image_format.hpp b/boost/compute/image/image_format.hpp new file mode 100644 index 0000000000..a6ecf83ef6 --- /dev/null +++ b/boost/compute/image/image_format.hpp @@ -0,0 +1,135 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_IMAGE_IMAGE_FORMAT_HPP +#define BOOST_COMPUTE_IMAGE_IMAGE_FORMAT_HPP + +#include <boost/compute/cl.hpp> + +namespace boost { +namespace compute { + +/// \class image_format +/// \brief A OpenCL image format +/// +/// For example, to create a format for a 8-bit RGBA image: +/// \code +/// boost::compute::image_format rgba8(CL_RGBA, CL_UNSIGNED_INT8); +/// \endcode +/// +/// After being constructed, image_format objects are usually passed to the +/// constructor of the various image classes (e.g. \ref image2d, \ref image3d) +/// to create an image object on a compute device. +/// +/// Image formats supported by a context can be queried with the static +/// get_supported_formats() in each image class. For example: +/// \code +/// std::vector<image_format> formats = image2d::get_supported_formats(ctx); +/// \endcode +/// +/// \see image2d +class image_format +{ +public: + enum channel_order { + r = CL_R, + a = CL_A, + intensity = CL_INTENSITY, + luminance = CL_LUMINANCE, + rg = CL_RG, + ra = CL_RA, + rgb = CL_RGB, + rgba = CL_RGBA, + argb = CL_ARGB, + bgra = CL_BGRA + }; + + enum channel_data_type { + snorm_int8 = CL_SNORM_INT8, + snorm_int16 = CL_SNORM_INT16, + unorm_int8 = CL_UNORM_INT8, + unorm_int16 = CL_UNORM_INT16, + unorm_short_565 = CL_UNORM_SHORT_565, + unorm_short_555 = CL_UNORM_SHORT_555, + unorm_int_101010 = CL_UNORM_INT_101010, + signed_int8 = CL_SIGNED_INT8, + signed_int16 = CL_SIGNED_INT16, + signed_int32 = CL_SIGNED_INT32, + unsigned_int8 = CL_UNSIGNED_INT8, + unsigned_int16 = CL_UNSIGNED_INT16, + unsigned_int32 = CL_UNSIGNED_INT32, + float16 = CL_HALF_FLOAT, + float32 = CL_FLOAT + }; + + /// Creates a new image format object with \p order and \p type. + explicit image_format(cl_channel_order order, cl_channel_type type) + { + m_format.image_channel_order = order; + m_format.image_channel_data_type = type; + } + + /// Creates a new image format object from \p format. + explicit image_format(const cl_image_format &format) + { + m_format.image_channel_order = format.image_channel_order; + m_format.image_channel_data_type = format.image_channel_data_type; + } + + /// Creates a new image format object as a copy of \p other. + image_format(const image_format &other) + : m_format(other.m_format) + { + } + + /// Copies the format from \p other to \c *this. + image_format& operator=(const image_format &other) + { + if(this != &other){ + m_format = other.m_format; + } + + return *this; + } + + /// Destroys the image format object. + ~image_format() + { + } + + /// Returns a pointer to the \c cl_image_format object. + const cl_image_format* get_format_ptr() const + { + return &m_format; + } + + /// Returns \c true if \c *this is the same as \p other. + bool operator==(const image_format &other) const + { + return m_format.image_channel_order == + other.m_format.image_channel_order && + m_format.image_channel_data_type == + other.m_format.image_channel_data_type; + } + + /// Returns \c true if \c *this is not the same as \p other. + bool operator!=(const image_format &other) const + { + return !(*this == other); + } + +private: + cl_image_format m_format; +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_IMAGE_IMAGE_FORMAT_HPP diff --git a/boost/compute/image/image_object.hpp b/boost/compute/image/image_object.hpp new file mode 100644 index 0000000000..451c68568f --- /dev/null +++ b/boost/compute/image/image_object.hpp @@ -0,0 +1,170 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_IMAGE_IMAGE_OBJECT_HPP +#define BOOST_COMPUTE_IMAGE_IMAGE_OBJECT_HPP + +#include <algorithm> +#include <vector> + +#include <boost/compute/config.hpp> +#include <boost/compute/memory_object.hpp> +#include <boost/compute/detail/get_object_info.hpp> +#include <boost/compute/image/image_format.hpp> + +namespace boost { +namespace compute { + +/// \class image_object +/// \brief Base-class for image objects. +/// +/// The image_object class is the base-class for image objects on compute +/// devices. +/// +/// \see image1d, image2d, image3d +class image_object : public memory_object +{ +public: + image_object() + : memory_object() + { + } + + explicit image_object(cl_mem mem, bool retain = true) + : memory_object(mem, retain) + { + } + + image_object(const image_object &other) + : memory_object(other) + { + } + + image_object& operator=(const image_object &other) + { + if(this != &other){ + memory_object::operator=(other); + } + + return *this; + } + + #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES + image_object(image_object&& other) BOOST_NOEXCEPT + : memory_object(std::move(other)) + { + } + + /// \internal_ + image_object& operator=(image_object&& other) BOOST_NOEXCEPT + { + memory_object::operator=(std::move(other)); + + return *this; + } + #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES + + /// Destroys the image object. + ~image_object() + { + } + + /// Returns information about the image object. + /// + /// \see_opencl_ref{clGetImageInfo} + template<class T> + T get_image_info(cl_mem_info info) const + { + return detail::get_object_info<T>(clGetImageInfo, m_mem, info); + } + + /// Returns the format for the image. + image_format format() const + { + return image_format(get_image_info<cl_image_format>(CL_IMAGE_FORMAT)); + } + + /// \internal_ (deprecated) + image_format get_format() const + { + return format(); + } + + /// Returns the width of the image. + size_t width() const + { + return get_image_info<size_t>(CL_IMAGE_WIDTH); + } + + /// Returns the height of the image. + /// + /// For 1D images, this function will return \c 1. + size_t height() const + { + return get_image_info<size_t>(CL_IMAGE_HEIGHT); + } + + /// Returns the depth of the image. + /// + /// For 1D and 2D images, this function will return \c 1. + size_t depth() const + { + return get_image_info<size_t>(CL_IMAGE_DEPTH); + } + + /// Returns the supported image formats for the \p type in \p context. + /// + /// \see_opencl_ref{clGetSupportedImageFormats} + static std::vector<image_format> + get_supported_formats(const context &context, + cl_mem_object_type type, + cl_mem_flags flags = read_write) + { + cl_uint count = 0; + clGetSupportedImageFormats(context, flags, type, 0, 0, &count); + + std::vector<cl_image_format> cl_formats(count); + clGetSupportedImageFormats(context, flags, type, count, &cl_formats[0], 0); + + std::vector<image_format> formats; + formats.reserve(count); + + for(cl_uint i = 0; i < count; i++){ + formats.push_back(image_format(cl_formats[i])); + } + + return formats; + } + + /// Returns \c true if \p format is a supported image format for + /// \p type in \p context with \p flags. + static bool is_supported_format(const image_format &format, + const context &context, + cl_mem_object_type type, + cl_mem_flags flags = read_write) + { + const std::vector<image_format> formats = + get_supported_formats(context, type, flags); + + return std::find(formats.begin(), formats.end(), format) != formats.end(); + } +}; + +namespace detail { + +// set_kernel_arg() specialization for image_object +template<> +struct set_kernel_arg<image_object> : public set_kernel_arg<memory_object> { }; + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_IMAGE_IMAGE_OBJECT_HPP diff --git a/boost/compute/image/image_sampler.hpp b/boost/compute/image/image_sampler.hpp new file mode 100644 index 0000000000..4f1bfe9b86 --- /dev/null +++ b/boost/compute/image/image_sampler.hpp @@ -0,0 +1,221 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_IMAGE_IMAGE_SAMPLER_HPP +#define BOOST_COMPUTE_IMAGE_IMAGE_SAMPLER_HPP + +#include <boost/throw_exception.hpp> + +#include <boost/compute/config.hpp> +#include <boost/compute/context.hpp> +#include <boost/compute/kernel.hpp> +#include <boost/compute/detail/get_object_info.hpp> +#include <boost/compute/detail/assert_cl_success.hpp> +#include <boost/compute/exception/opencl_error.hpp> +#include <boost/compute/type_traits/type_name.hpp> + +namespace boost { +namespace compute { + +/// \class image_sampler +/// \brief An OpenCL image sampler object +/// +/// \see image2d, image_format +class image_sampler +{ +public: + enum addressing_mode { + none = CL_ADDRESS_NONE, + clamp_to_edge = CL_ADDRESS_CLAMP_TO_EDGE, + clamp = CL_ADDRESS_CLAMP, + repeat = CL_ADDRESS_REPEAT + }; + + enum filter_mode { + nearest = CL_FILTER_NEAREST, + linear = CL_FILTER_LINEAR + }; + + image_sampler() + : m_sampler(0) + { + } + + image_sampler(const context &context, + bool normalized_coords, + cl_addressing_mode addressing_mode, + cl_filter_mode filter_mode) + { + cl_int error = 0; + + #ifdef CL_VERSION_2_0 + std::vector<cl_sampler_properties> sampler_properties; + sampler_properties.push_back(CL_SAMPLER_NORMALIZED_COORDS); + sampler_properties.push_back(cl_sampler_properties(normalized_coords)); + sampler_properties.push_back(CL_SAMPLER_ADDRESSING_MODE); + sampler_properties.push_back(cl_sampler_properties(addressing_mode)); + sampler_properties.push_back(CL_SAMPLER_FILTER_MODE); + sampler_properties.push_back(cl_sampler_properties(filter_mode)); + sampler_properties.push_back(cl_sampler_properties(0)); + + m_sampler = clCreateSamplerWithProperties( + context, &sampler_properties[0], &error + ); + #else + m_sampler = clCreateSampler( + context, normalized_coords, addressing_mode, filter_mode, &error + ); + #endif + + if(!m_sampler){ + BOOST_THROW_EXCEPTION(opencl_error(error)); + } + } + + explicit image_sampler(cl_sampler sampler, bool retain = true) + : m_sampler(sampler) + { + if(m_sampler && retain){ + clRetainSampler(m_sampler); + } + } + + /// Creates a new image sampler object as a copy of \p other. + image_sampler(const image_sampler &other) + : m_sampler(other.m_sampler) + { + if(m_sampler){ + clRetainSampler(m_sampler); + } + } + + /// Copies the image sampler object from \p other to \c *this. + image_sampler& operator=(const image_sampler &other) + { + if(this != &other){ + if(m_sampler){ + clReleaseSampler(m_sampler); + } + + m_sampler = other.m_sampler; + + if(m_sampler){ + clRetainSampler(m_sampler); + } + } + + return *this; + } + + #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES + image_sampler(image_sampler&& other) BOOST_NOEXCEPT + : m_sampler(other.m_sampler) + { + other.m_sampler = 0; + } + + image_sampler& operator=(image_sampler&& other) BOOST_NOEXCEPT + { + if(m_sampler){ + clReleaseSampler(m_sampler); + } + + m_sampler = other.m_sampler; + other.m_sampler = 0; + + return *this; + } + #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES + + /// Destroys the image sampler object. + ~image_sampler() + { + if(m_sampler){ + BOOST_COMPUTE_ASSERT_CL_SUCCESS( + clReleaseSampler(m_sampler) + ); + } + } + + /// Returns the underlying \c cl_sampler object. + cl_sampler& get() const + { + return const_cast<cl_sampler &>(m_sampler); + } + + /// Returns the context for the image sampler object. + context get_context() const + { + return context(get_info<cl_context>(CL_SAMPLER_CONTEXT)); + } + + /// Returns information about the sampler. + /// + /// \see_opencl_ref{clGetSamplerInfo} + template<class T> + T get_info(cl_sampler_info info) const + { + return detail::get_object_info<T>(clGetSamplerInfo, m_sampler, info); + } + + /// \overload + template<int Enum> + typename detail::get_object_info_type<image_sampler, Enum>::type + get_info() const; + + /// Returns \c true if the sampler is the same at \p other. + bool operator==(const image_sampler &other) const + { + return m_sampler == other.m_sampler; + } + + /// Returns \c true if the sampler is different from \p other. + bool operator!=(const image_sampler &other) const + { + return m_sampler != other.m_sampler; + } + + operator cl_sampler() const + { + return m_sampler; + } + +private: + cl_sampler m_sampler; +}; + +/// \internal_ define get_info() specializations for image_sampler +BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(image_sampler, + ((cl_uint, CL_SAMPLER_REFERENCE_COUNT)) + ((cl_context, CL_SAMPLER_CONTEXT)) + ((cl_addressing_mode, CL_SAMPLER_ADDRESSING_MODE)) + ((cl_filter_mode, CL_SAMPLER_FILTER_MODE)) + ((bool, CL_SAMPLER_NORMALIZED_COORDS)) +) + +namespace detail { + +// set_kernel_arg specialization for image samplers +template<> +struct set_kernel_arg<image_sampler> +{ + void operator()(kernel &kernel_, size_t index, const image_sampler &sampler) + { + kernel_.set_arg(index, sampler.get()); + } +}; + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +BOOST_COMPUTE_TYPE_NAME(boost::compute::image_sampler, sampler_t) + +#endif // BOOST_COMPUTE_IMAGE_IMAGE_SAMPLER_HPP diff --git a/boost/compute/image2d.hpp b/boost/compute/image2d.hpp new file mode 100644 index 0000000000..68460813da --- /dev/null +++ b/boost/compute/image2d.hpp @@ -0,0 +1,12 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +// deprecated, use <boost/compute/image/image2d.hpp> instead +#include <boost/compute/image/image2d.hpp> diff --git a/boost/compute/image3d.hpp b/boost/compute/image3d.hpp new file mode 100644 index 0000000000..ab7467c4f3 --- /dev/null +++ b/boost/compute/image3d.hpp @@ -0,0 +1,12 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +// deprecated, use <boost/compute/image/image3d.hpp> instead +#include <boost/compute/image/image3d.hpp> diff --git a/boost/compute/image_format.hpp b/boost/compute/image_format.hpp new file mode 100644 index 0000000000..1ee50014aa --- /dev/null +++ b/boost/compute/image_format.hpp @@ -0,0 +1,12 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +// deprecated, use <boost/compute/image/image_format.hpp> instead +#include <boost/compute/image/image_format.hpp> diff --git a/boost/compute/image_sampler.hpp b/boost/compute/image_sampler.hpp new file mode 100644 index 0000000000..1cd11608b9 --- /dev/null +++ b/boost/compute/image_sampler.hpp @@ -0,0 +1,12 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +// deprecated, use <boost/compute/image/image_sampler.hpp> instead +#include <boost/compute/image/image_sampler.hpp> diff --git a/boost/compute/interop/eigen.hpp b/boost/compute/interop/eigen.hpp new file mode 100644 index 0000000000..f616911417 --- /dev/null +++ b/boost/compute/interop/eigen.hpp @@ -0,0 +1,16 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_INTEROP_EIGEN_HPP +#define BOOST_COMPUTE_INTEROP_EIGEN_HPP + +#include <boost/compute/interop/eigen/core.hpp> + +#endif // BOOST_COMPUTE_INTEROP_EIGEN_HPP diff --git a/boost/compute/interop/eigen/core.hpp b/boost/compute/interop/eigen/core.hpp new file mode 100644 index 0000000000..aa047efd32 --- /dev/null +++ b/boost/compute/interop/eigen/core.hpp @@ -0,0 +1,72 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_INTEROP_EIGEN_EIGEN_HPP +#define BOOST_COMPUTE_INTEROP_EIGEN_EIGEN_HPP + +#include <Eigen/Core> + +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/copy_n.hpp> +#include <boost/compute/iterator/buffer_iterator.hpp> +#include <boost/compute/type_traits/type_name.hpp> + +namespace boost { +namespace compute { + +/// Copies \p matrix to \p buffer. +template<class Derived> +inline void eigen_copy_matrix_to_buffer(const Eigen::PlainObjectBase<Derived> &matrix, + buffer_iterator<typename Derived::Scalar> buffer, + command_queue &queue = system::default_queue()) +{ + ::boost::compute::copy_n(matrix.data(), matrix.size(), buffer, queue); +} + +/// Copies \p buffer to \p matrix. +template<class Derived> +inline void eigen_copy_buffer_to_matrix(const buffer_iterator<typename Derived::Scalar> buffer, + Eigen::PlainObjectBase<Derived> &matrix, + command_queue &queue = system::default_queue()) +{ + ::boost::compute::copy_n(buffer, matrix.size(), matrix.data(), queue); +} + +/// Converts an \c Eigen::Matrix4f to a \c float16_. +inline float16_ eigen_matrix4f_to_float16(const Eigen::Matrix4f &matrix) +{ + float16_ result; + std::memcpy(&result, matrix.data(), 16 * sizeof(float)); + return result; +} + +/// Converts an \c Eigen::Matrix4d to a \c double16_. +inline double16_ eigen_matrix4d_to_double16(const Eigen::Matrix4d &matrix) +{ + double16_ result; + std::memcpy(&result, matrix.data(), 16 * sizeof(double)); + return result; +} + +} // end compute namespace +} // end boost namespace + +BOOST_COMPUTE_TYPE_NAME(Eigen::Vector2i, int2) +BOOST_COMPUTE_TYPE_NAME(Eigen::Vector4i, int4) +BOOST_COMPUTE_TYPE_NAME(Eigen::Vector2f, float2) +BOOST_COMPUTE_TYPE_NAME(Eigen::Vector4f, float4) +BOOST_COMPUTE_TYPE_NAME(Eigen::Matrix2f, float8) +BOOST_COMPUTE_TYPE_NAME(Eigen::Matrix4f, float16) +BOOST_COMPUTE_TYPE_NAME(Eigen::Vector2d, double2) +BOOST_COMPUTE_TYPE_NAME(Eigen::Vector4d, double4) +BOOST_COMPUTE_TYPE_NAME(Eigen::Matrix2d, double8) +BOOST_COMPUTE_TYPE_NAME(Eigen::Matrix4d, double16) + +#endif // BOOST_COMPUTE_INTEROP_EIGEN_EIGEN_HPP diff --git a/boost/compute/interop/opencv.hpp b/boost/compute/interop/opencv.hpp new file mode 100644 index 0000000000..8cfa3a1c8a --- /dev/null +++ b/boost/compute/interop/opencv.hpp @@ -0,0 +1,17 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_INTEROP_OPENCV_HPP +#define BOOST_COMPUTE_INTEROP_OPENCV_HPP + +#include <boost/compute/interop/opencv/core.hpp> +#include <boost/compute/interop/opencv/highgui.hpp> + +#endif // BOOST_COMPUTE_INTEROP_OPENCV_HPP diff --git a/boost/compute/interop/opencv/core.hpp b/boost/compute/interop/opencv/core.hpp new file mode 100644 index 0000000000..e3c7bf6dda --- /dev/null +++ b/boost/compute/interop/opencv/core.hpp @@ -0,0 +1,141 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_INTEROP_OPENCV_CORE_HPP +#define BOOST_COMPUTE_INTEROP_OPENCV_CORE_HPP + +#include <opencv2/core/core.hpp> + +#include <boost/throw_exception.hpp> + +#include <boost/compute/algorithm/copy_n.hpp> +#include <boost/compute/exception/opencl_error.hpp> +#include <boost/compute/image/image2d.hpp> +#include <boost/compute/image/image_format.hpp> +#include <boost/compute/iterator/buffer_iterator.hpp> + +namespace boost { +namespace compute { + +template<class T> +inline void opencv_copy_mat_to_buffer(const cv::Mat &mat, + buffer_iterator<T> buffer, + command_queue &queue = system::default_queue()) +{ + BOOST_ASSERT(mat.isContinuous()); + + ::boost::compute::copy_n( + reinterpret_cast<T *>(mat.data), mat.rows * mat.cols, buffer, queue + ); +} + +template<class T> +inline void opencv_copy_buffer_to_mat(const buffer_iterator<T> buffer, + cv::Mat &mat, + command_queue &queue = system::default_queue()) +{ + BOOST_ASSERT(mat.isContinuous()); + + ::boost::compute::copy_n( + buffer, mat.cols * mat.rows, reinterpret_cast<T *>(mat.data), queue + ); +} + +inline void opencv_copy_mat_to_image(const cv::Mat &mat, + image2d &image, + command_queue &queue = system::default_queue()) +{ + BOOST_ASSERT(mat.data != 0); + BOOST_ASSERT(mat.isContinuous()); + BOOST_ASSERT(image.get_context() == queue.get_context()); + + queue.enqueue_write_image(image, image.origin(), image.size(), mat.data); +} + +inline void opencv_copy_image_to_mat(const image2d &image, + cv::Mat &mat, + command_queue &queue = system::default_queue()) +{ + BOOST_ASSERT(mat.isContinuous()); + BOOST_ASSERT(image.get_context() == queue.get_context()); + + queue.enqueue_read_image(image, image.origin(), image.size(), mat.data); +} + +inline image_format opencv_get_mat_image_format(const cv::Mat &mat) +{ + switch(mat.type()){ + case CV_8UC4: + return image_format(CL_BGRA, CL_UNORM_INT8); + case CV_16UC4: + return image_format(CL_BGRA, CL_UNORM_INT16); + case CV_32F: + return image_format(CL_INTENSITY, CL_FLOAT); + case CV_32FC4: + return image_format(CL_RGBA, CL_FLOAT); + case CV_8UC1: + return image_format(CL_INTENSITY, CL_UNORM_INT8); + } + + BOOST_THROW_EXCEPTION(opencl_error(CL_IMAGE_FORMAT_NOT_SUPPORTED)); +} + +inline cv::Mat opencv_create_mat_with_image2d(const image2d &image, + command_queue &queue = system::default_queue()) +{ + BOOST_ASSERT(image.get_context() == queue.get_context()); + + cv::Mat mat; + image_format format = image.get_format(); + const cl_image_format *cl_image_format = format.get_format_ptr(); + + if(cl_image_format->image_channel_data_type == CL_UNORM_INT8 && + cl_image_format->image_channel_order == CL_BGRA) + { + mat = cv::Mat(image.height(), image.width(), CV_8UC4); + } + else if(cl_image_format->image_channel_data_type == CL_UNORM_INT16 && + cl_image_format->image_channel_order == CL_BGRA) + { + mat = cv::Mat(image.height(), image.width(), CV_16UC4); + } + else if(cl_image_format->image_channel_data_type == CL_FLOAT && + cl_image_format->image_channel_order == CL_INTENSITY) + { + mat = cv::Mat(image.height(), image.width(), CV_32FC1); + } + else + { + mat = cv::Mat(image.height(), image.width(), CV_8UC1); + } + + opencv_copy_image_to_mat(image, mat, queue); + + return mat; +} + +inline image2d opencv_create_image2d_with_mat(const cv::Mat &mat, + cl_mem_flags flags, + command_queue &queue = system::default_queue()) +{ + const context &context = queue.get_context(); + const image_format format = opencv_get_mat_image_format(mat); + + image2d image(context, mat.cols, mat.rows, format, flags); + + opencv_copy_mat_to_image(mat, image, queue); + + return image; +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_INTEROP_OPENCV_CORE_HPP diff --git a/boost/compute/interop/opencv/highgui.hpp b/boost/compute/interop/opencv/highgui.hpp new file mode 100644 index 0000000000..66baa728f6 --- /dev/null +++ b/boost/compute/interop/opencv/highgui.hpp @@ -0,0 +1,33 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_INTEROP_OPENCV_HIGHGUI_HPP +#define BOOST_COMPUTE_INTEROP_OPENCV_HIGHGUI_HPP + +#include <opencv2/highgui/highgui.hpp> + +#include <boost/compute/interop/opencv/core.hpp> + +namespace boost { +namespace compute { + +inline void opencv_imshow(const std::string &winname, + const image2d &image, + command_queue &queue = system::default_queue()) +{ + const cv::Mat mat = opencv_create_mat_with_image2d(image, queue); + + cv::imshow(winname, mat); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_INTEROP_OPENCV_HIGHGUI_HPP diff --git a/boost/compute/interop/opencv/ocl.hpp b/boost/compute/interop/opencv/ocl.hpp new file mode 100644 index 0000000000..23d8ac2273 --- /dev/null +++ b/boost/compute/interop/opencv/ocl.hpp @@ -0,0 +1,51 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_INTEROP_OPENCV_OCL_HPP +#define BOOST_COMPUTE_INTEROP_OPENCV_OCL_HPP + +#include <opencv2/ocl/ocl.hpp> + +#include <boost/compute/buffer.hpp> +#include <boost/compute/context.hpp> +#include <boost/compute/command_queue.hpp> + +namespace boost { +namespace compute { + +context opencv_ocl_get_context() +{ + void *ocl_context = cv::ocl::getoclContext(); + if(!ocl_context){ + return context(); + } + + return context(*(static_cast<cl_context *>(ocl_context))); +} + +command_queue opencv_ocl_get_command_queue() +{ + void *ocl_queue = cv::ocl::getoclCommandQueue(); + if(!ocl_queue){ + return command_queue(); + } + + return command_queue(*(static_cast<cl_command_queue *>(ocl_queue))); +} + +buffer opencv_ocl_get_buffer(const cv::ocl::oclMat &mat) +{ + return buffer(reinterpret_cast<cl_mem>(mat.data)); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_INTEROP_OPENCV_OCL_HPP diff --git a/boost/compute/interop/opengl.hpp b/boost/compute/interop/opengl.hpp new file mode 100644 index 0000000000..7ae12617ac --- /dev/null +++ b/boost/compute/interop/opengl.hpp @@ -0,0 +1,24 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_INTEROP_OPENGL_HPP +#define BOOST_COMPUTE_INTEROP_OPENGL_HPP + +/// \file +/// +/// Meta-header to include all Boost.Compute OpenGL interop headers. + +#include <boost/compute/interop/opengl/acquire.hpp> +#include <boost/compute/interop/opengl/context.hpp> +#include <boost/compute/interop/opengl/opengl_buffer.hpp> +#include <boost/compute/interop/opengl/opengl_renderbuffer.hpp> +#include <boost/compute/interop/opengl/opengl_texture.hpp> + +#endif // BOOST_COMPUTE_INTEROP_OPENGL_HPP diff --git a/boost/compute/interop/opengl/acquire.hpp b/boost/compute/interop/opengl/acquire.hpp new file mode 100644 index 0000000000..10af4338fb --- /dev/null +++ b/boost/compute/interop/opengl/acquire.hpp @@ -0,0 +1,99 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_INTEROP_OPENGL_ACQUIRE_HPP +#define BOOST_COMPUTE_INTEROP_OPENGL_ACQUIRE_HPP + +#include <boost/compute/command_queue.hpp> +#include <boost/compute/interop/opengl/cl_gl.hpp> +#include <boost/compute/interop/opengl/opengl_buffer.hpp> +#include <boost/compute/utility/wait_list.hpp> + +namespace boost { +namespace compute { + +/// Enqueues a command to acquire the specified OpenGL memory objects. +/// +/// \see_opencl_ref{clEnqueueAcquireGLObjects} +inline event opengl_enqueue_acquire_gl_objects(size_t num_objects, + const cl_mem *mem_objects, + command_queue &queue, + const wait_list &events = wait_list()) +{ + BOOST_ASSERT(queue != 0); + + event event_; + + cl_int ret = clEnqueueAcquireGLObjects(queue.get(), + num_objects, + mem_objects, + events.size(), + events.get_event_ptr(), + &event_.get()); + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; +} + +/// Enqueues a command to release the specified OpenGL memory objects. +/// +/// \see_opencl_ref{clEnqueueReleaseGLObjects} +inline event opengl_enqueue_release_gl_objects(size_t num_objects, + const cl_mem *mem_objects, + command_queue &queue, + const wait_list &events = wait_list()) +{ + BOOST_ASSERT(queue != 0); + + event event_; + + cl_int ret = clEnqueueReleaseGLObjects(queue.get(), + num_objects, + mem_objects, + events.size(), + events.get_event_ptr(), + &event_.get()); + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return event_; +} + +/// Enqueues a command to acquire the specified OpenGL buffer. +/// +/// \see_opencl_ref{clEnqueueAcquireGLObjects} +inline event opengl_enqueue_acquire_buffer(const opengl_buffer &buffer, + command_queue &queue, + const wait_list &events = wait_list()) +{ + BOOST_ASSERT(buffer.get_context() == queue.get_context()); + + return opengl_enqueue_acquire_gl_objects(1, &buffer.get(), queue, events); +} + +/// Enqueues a command to release the specified OpenGL buffer. +/// +/// \see_opencl_ref{clEnqueueReleaseGLObjects} +inline event opengl_enqueue_release_buffer(const opengl_buffer &buffer, + command_queue &queue, + const wait_list &events = wait_list()) +{ + BOOST_ASSERT(buffer.get_context() == queue.get_context()); + + return opengl_enqueue_release_gl_objects(1, &buffer.get(), queue, events); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_INTEROP_OPENGL_ACQUIRE_HPP diff --git a/boost/compute/interop/opengl/cl_gl.hpp b/boost/compute/interop/opengl/cl_gl.hpp new file mode 100644 index 0000000000..de82dbd9ab --- /dev/null +++ b/boost/compute/interop/opengl/cl_gl.hpp @@ -0,0 +1,20 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_INTEROP_OPENGL_CL_GL_HPP +#define BOOST_COMPUTE_INTEROP_OPENGL_CL_GL_HPP + +#if defined(__APPLE__) +#include <OpenCL/cl_gl.h> +#else +#include <CL/cl_gl.h> +#endif + +#endif // BOOST_COMPUTE_INTEROP_OPENGL_CL_GL_HPP diff --git a/boost/compute/interop/opengl/cl_gl_ext.hpp b/boost/compute/interop/opengl/cl_gl_ext.hpp new file mode 100644 index 0000000000..3392b051e0 --- /dev/null +++ b/boost/compute/interop/opengl/cl_gl_ext.hpp @@ -0,0 +1,20 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_INTEROP_OPENGL_CL_GL_EXT_HPP +#define BOOST_COMPUTE_INTEROP_OPENGL_CL_GL_EXT_HPP + +#if defined(__APPLE__) +#include <OpenCL/cl_gl_ext.h> +#else +#include <CL/cl_gl_ext.h> +#endif + +#endif // BOOST_COMPUTE_INTEROP_OPENGL_CL_GL_EXT_HPP diff --git a/boost/compute/interop/opengl/context.hpp b/boost/compute/interop/opengl/context.hpp new file mode 100644 index 0000000000..754dca2236 --- /dev/null +++ b/boost/compute/interop/opengl/context.hpp @@ -0,0 +1,135 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_INTEROP_OPENGL_CONTEXT_HPP +#define BOOST_COMPUTE_INTEROP_OPENGL_CONTEXT_HPP + +#include <boost/throw_exception.hpp> + +#include <boost/compute/device.hpp> +#include <boost/compute/system.hpp> +#include <boost/compute/context.hpp> +#include <boost/compute/exception/unsupported_extension_error.hpp> +#include <boost/compute/interop/opengl/cl_gl.hpp> + +#ifdef __APPLE__ +#include <OpenCL/cl_gl_ext.h> +#include <OpenGL/OpenGL.h> +#endif + +#ifdef __linux__ +#include <GL/glx.h> +#endif + +namespace boost { +namespace compute { + +/// Creates a shared OpenCL/OpenGL context for the currently active +/// OpenGL context. +/// +/// Once created, the shared context can be used to create OpenCL memory +/// objects which can interact with OpenGL memory objects (e.g. VBOs). +/// +/// \throws unsupported_extension_error if no CL-GL sharing capable devices +/// are found. +inline context opengl_create_shared_context() +{ + // name of the OpenGL sharing extension for the system +#if defined(__APPLE__) + const char *cl_gl_sharing_extension = "cl_APPLE_gl_sharing"; +#else + const char *cl_gl_sharing_extension = "cl_khr_gl_sharing"; +#endif + +#if defined(__APPLE__) + // get OpenGL share group + CGLContextObj cgl_current_context = CGLGetCurrentContext(); + CGLShareGroupObj cgl_share_group = CGLGetShareGroup(cgl_current_context); + + cl_context_properties properties[] = { + CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, + (cl_context_properties) cgl_share_group, + 0 + }; + + cl_int error = 0; + cl_context cl_gl_context = clCreateContext(properties, 0, 0, 0, 0, &error); + if(!cl_gl_context){ + BOOST_THROW_EXCEPTION(opencl_error(error)); + } + + return context(cl_gl_context, false); +#else + typedef cl_int(*GetGLContextInfoKHRFunction)( + const cl_context_properties*, cl_gl_context_info, size_t, void *, size_t * + ); + + std::vector<platform> platforms = system::platforms(); + for(size_t i = 0; i < platforms.size(); i++){ + const platform &platform = platforms[i]; + + // load clGetGLContextInfoKHR() extension function + GetGLContextInfoKHRFunction GetGLContextInfoKHR = + reinterpret_cast<GetGLContextInfoKHRFunction>( + reinterpret_cast<unsigned long>( + platform.get_extension_function_address("clGetGLContextInfoKHR") + ) + ); + if(!GetGLContextInfoKHR){ + continue; + } + + // create context properties listing the platform and current OpenGL display + cl_context_properties properties[] = { + CL_CONTEXT_PLATFORM, (cl_context_properties) platform.id(), + #if defined(__linux__) + CL_GL_CONTEXT_KHR, (cl_context_properties) glXGetCurrentContext(), + CL_GLX_DISPLAY_KHR, (cl_context_properties) glXGetCurrentDisplay(), + #elif defined(WIN32) + CL_GL_CONTEXT_KHR, (cl_context_properties) wglGetCurrentContext(), + CL_WGL_HDC_KHR, (cl_context_properties) wglGetCurrentDC(), + #endif + 0 + }; + + // lookup current OpenCL device for current OpenGL context + cl_device_id gpu_id; + cl_int ret = GetGLContextInfoKHR( + properties, + CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR, + sizeof(cl_device_id), + &gpu_id, + 0 + ); + if(ret != CL_SUCCESS){ + continue; + } + + // create device object for the GPU and ensure it supports CL-GL sharing + device gpu(gpu_id, false); + if(!gpu.supports_extension(cl_gl_sharing_extension)){ + continue; + } + + // return CL-GL sharing context + return context(gpu, properties); + } +#endif + + // no CL-GL sharing capable devices found + BOOST_THROW_EXCEPTION( + unsupported_extension_error(cl_gl_sharing_extension) + ); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_INTEROP_OPENGL_CONTEXT_HPP diff --git a/boost/compute/interop/opengl/gl.hpp b/boost/compute/interop/opengl/gl.hpp new file mode 100644 index 0000000000..a05c944075 --- /dev/null +++ b/boost/compute/interop/opengl/gl.hpp @@ -0,0 +1,20 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_INTEROP_OPENGL_GL_HPP +#define BOOST_COMPUTE_INTEROP_OPENGL_GL_HPP + +#if defined(__APPLE__) +#include <OpenGL/gl.h> +#else +#include <GL/gl.h> +#endif + +#endif // BOOST_COMPUTE_INTEROP_OPENGL_GL_HPP diff --git a/boost/compute/interop/opengl/opengl_buffer.hpp b/boost/compute/interop/opengl/opengl_buffer.hpp new file mode 100644 index 0000000000..c27347d0d9 --- /dev/null +++ b/boost/compute/interop/opengl/opengl_buffer.hpp @@ -0,0 +1,106 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_BUFFER_HPP +#define BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_BUFFER_HPP + +#include <boost/compute/buffer.hpp> +#include <boost/compute/interop/opengl/gl.hpp> +#include <boost/compute/interop/opengl/cl_gl.hpp> + +namespace boost { +namespace compute { + +/// \class opengl_buffer +/// +/// A OpenCL buffer for accessing an OpenGL memory object. +class opengl_buffer : public buffer +{ +public: + /// Creates a null OpenGL buffer object. + opengl_buffer() + : buffer() + { + } + + /// Creates a new OpenGL buffer object for \p mem. + explicit opengl_buffer(cl_mem mem, bool retain = true) + : buffer(mem, retain) + { + } + + /// Creates a new OpenGL buffer object in \p context for \p bufobj + /// with \p flags. + /// + /// \see_opencl_ref{clCreateFromGLBuffer} + opengl_buffer(const context &context, + GLuint bufobj, + cl_mem_flags flags = read_write) + { + cl_int error = 0; + m_mem = clCreateFromGLBuffer(context, flags, bufobj, &error); + if(!m_mem){ + BOOST_THROW_EXCEPTION(opencl_error(error)); + } + } + + /// Creates a new OpenGL buffer object as a copy of \p other. + opengl_buffer(const opengl_buffer &other) + : buffer(other) + { + } + + /// Copies the OpenGL buffer object from \p other. + opengl_buffer& operator=(const opengl_buffer &other) + { + if(this != &other){ + buffer::operator=(other); + } + + return *this; + } + + /// Destroys the OpenGL buffer object. + ~opengl_buffer() + { + } + + /// Returns the OpenGL memory object ID. + /// + /// \see_opencl_ref{clGetGLObjectInfo} + GLuint get_opengl_object() const + { + GLuint object = 0; + clGetGLObjectInfo(m_mem, 0, &object); + return object; + } + + /// Returns the OpenGL memory object type. + /// + /// \see_opencl_ref{clGetGLObjectInfo} + cl_gl_object_type get_opengl_type() const + { + cl_gl_object_type type; + clGetGLObjectInfo(m_mem, &type, 0); + return type; + } +}; + +namespace detail { + +// set_kernel_arg specialization for opengl_buffer +template<> +struct set_kernel_arg<opengl_buffer> : set_kernel_arg<memory_object> { }; + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_BUFFER_HPP diff --git a/boost/compute/interop/opengl/opengl_renderbuffer.hpp b/boost/compute/interop/opengl/opengl_renderbuffer.hpp new file mode 100644 index 0000000000..fd4759d0ee --- /dev/null +++ b/boost/compute/interop/opengl/opengl_renderbuffer.hpp @@ -0,0 +1,129 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_RENDERBUFFER_HPP +#define BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_RENDERBUFFER_HPP + +#include <boost/compute/image/image_object.hpp> +#include <boost/compute/interop/opengl/gl.hpp> +#include <boost/compute/interop/opengl/cl_gl.hpp> +#include <boost/compute/type_traits/type_name.hpp> +#include <boost/compute/utility/extents.hpp> + +namespace boost { +namespace compute { + +/// \class opengl_renderbuffer +/// +/// A OpenCL buffer for accessing an OpenGL renderbuffer object. +class opengl_renderbuffer : public image_object +{ +public: + /// Creates a null OpenGL renderbuffer object. + opengl_renderbuffer() + : image_object() + { + } + + /// Creates a new OpenGL renderbuffer object for \p mem. + explicit opengl_renderbuffer(cl_mem mem, bool retain = true) + : image_object(mem, retain) + { + } + + /// Creates a new OpenGL renderbuffer object in \p context for + /// \p renderbuffer with \p flags. + /// + /// \see_opencl_ref{clCreateFromGLRenderbuffer} + opengl_renderbuffer(const context &context, + GLuint renderbuffer, + cl_mem_flags flags = read_write) + { + cl_int error = 0; + + m_mem = clCreateFromGLRenderbuffer( + context, flags, renderbuffer, &error + ); + + if(!m_mem){ + BOOST_THROW_EXCEPTION(opencl_error(error)); + } + } + + /// Creates a new OpenGL renderbuffer object as a copy of \p other. + opengl_renderbuffer(const opengl_renderbuffer &other) + : image_object(other) + { + } + + /// Copies the OpenGL renderbuffer object from \p other. + opengl_renderbuffer& operator=(const opengl_renderbuffer &other) + { + if(this != &other){ + image_object::operator=(other); + } + + return *this; + } + + /// Destroys the OpenGL buffer object. + ~opengl_renderbuffer() + { + } + + /// Returns the size (width, height) of the renderbuffer. + extents<2> size() const + { + extents<2> size; + size[0] = get_image_info<size_t>(CL_IMAGE_WIDTH); + size[1] = get_image_info<size_t>(CL_IMAGE_HEIGHT); + return size; + } + + /// Returns the origin of the renderbuffer (\c 0, \c 0). + extents<2> origin() const + { + return extents<2>(); + } + + /// Returns the OpenGL memory object ID. + /// + /// \see_opencl_ref{clGetGLObjectInfo} + GLuint get_opengl_object() const + { + GLuint object = 0; + clGetGLObjectInfo(m_mem, 0, &object); + return object; + } + + /// Returns the OpenGL memory object type. + /// + /// \see_opencl_ref{clGetGLObjectInfo} + cl_gl_object_type get_opengl_type() const + { + cl_gl_object_type type; + clGetGLObjectInfo(m_mem, &type, 0); + return type; + } +}; + +namespace detail { + +// set_kernel_arg() specialization for opengl_renderbuffer +template<> +struct set_kernel_arg<opengl_renderbuffer> : public set_kernel_arg<image_object> { }; + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +BOOST_COMPUTE_TYPE_NAME(boost::compute::opengl_renderbuffer, image2d_t) + +#endif // BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_RENDERBUFFER_HPP diff --git a/boost/compute/interop/opengl/opengl_texture.hpp b/boost/compute/interop/opengl/opengl_texture.hpp new file mode 100644 index 0000000000..c1f3f4f441 --- /dev/null +++ b/boost/compute/interop/opengl/opengl_texture.hpp @@ -0,0 +1,133 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_TEXTURE_HPP +#define BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_TEXTURE_HPP + +#include <boost/compute/image/image_object.hpp> +#include <boost/compute/interop/opengl/gl.hpp> +#include <boost/compute/interop/opengl/cl_gl.hpp> +#include <boost/compute/detail/get_object_info.hpp> +#include <boost/compute/type_traits/type_name.hpp> +#include <boost/compute/utility/extents.hpp> + +namespace boost { +namespace compute { + +/// \class opengl_texture +/// +/// A OpenCL image2d for accessing an OpenGL texture object. +class opengl_texture : public image_object +{ +public: + /// Creates a null OpenGL texture object. + opengl_texture() + : image_object() + { + } + + /// Creates a new OpenGL texture object for \p mem. + explicit opengl_texture(cl_mem mem, bool retain = true) + : image_object(mem, retain) + { + } + + /// Creates a new OpenGL texture object in \p context for \p texture + /// with \p flags. + /// + /// \see_opencl_ref{clCreateFromGLTexture} + opengl_texture(const context &context, + GLenum texture_target, + GLint miplevel, + GLuint texture, + cl_mem_flags flags = read_write) + { + cl_int error = 0; + + #ifdef CL_VERSION_1_2 + m_mem = clCreateFromGLTexture(context, + flags, + texture_target, + miplevel, + texture, + &error); + #else + m_mem = clCreateFromGLTexture2D(context, + flags, + texture_target, + miplevel, + texture, + &error); + #endif + + if(!m_mem){ + BOOST_THROW_EXCEPTION(opencl_error(error)); + } + } + + /// Creates a new OpenGL texture object as a copy of \p other. + opengl_texture(const opengl_texture &other) + : image_object(other) + { + } + + /// Copies the OpenGL texture object from \p other. + opengl_texture& operator=(const opengl_texture &other) + { + if(this != &other){ + image_object::operator=(other); + } + + return *this; + } + + /// Destroys the texture object. + ~opengl_texture() + { + } + + /// Returns the size (width, height) of the texture. + extents<2> size() const + { + extents<2> size; + size[0] = get_image_info<size_t>(CL_IMAGE_WIDTH); + size[1] = get_image_info<size_t>(CL_IMAGE_HEIGHT); + return size; + } + + /// Returns the origin of the texture (\c 0, \c 0). + extents<2> origin() const + { + return extents<2>(); + } + + /// Returns information about the texture. + /// + /// \see_opencl_ref{clGetGLTextureInfo} + template<class T> + T get_texture_info(cl_gl_texture_info info) const + { + return detail::get_object_info<T>(clGetGLTextureInfo, m_mem, info); + } +}; + +namespace detail { + +// set_kernel_arg() specialization for opengl_texture +template<> +struct set_kernel_arg<opengl_texture> : public set_kernel_arg<image_object> { }; + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +BOOST_COMPUTE_TYPE_NAME(boost::compute::opengl_texture, image2d_t) + +#endif // BOOST_COMPUTE_INTEROP_OPENGL_OPENGL_TEXTURE_HPP diff --git a/boost/compute/interop/qt.hpp b/boost/compute/interop/qt.hpp new file mode 100644 index 0000000000..f53691cc34 --- /dev/null +++ b/boost/compute/interop/qt.hpp @@ -0,0 +1,17 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_INTEROP_QT_HPP +#define BOOST_COMPUTE_INTEROP_QT_HPP + +#include <boost/compute/interop/qt/qtcore.hpp> +#include <boost/compute/interop/qt/qtgui.hpp> + +#endif // BOOST_COMPUTE_INTEROP_QT_HPP diff --git a/boost/compute/interop/qt/qimage.hpp b/boost/compute/interop/qt/qimage.hpp new file mode 100644 index 0000000000..faa6f98cc8 --- /dev/null +++ b/boost/compute/interop/qt/qimage.hpp @@ -0,0 +1,69 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_INTEROP_QT_QIMAGE_HPP +#define BOOST_COMPUTE_INTEROP_QT_QIMAGE_HPP + +#include <boost/throw_exception.hpp> + +#include <boost/compute/command_queue.hpp> +#include <boost/compute/exception/opencl_error.hpp> +#include <boost/compute/image/image2d.hpp> +#include <boost/compute/image/image_format.hpp> +#include <boost/compute/utility/dim.hpp> + +#include <QImage> + +namespace boost { +namespace compute { + +inline image_format qt_qimage_format_to_image_format(const QImage::Format &format) +{ + if(format == QImage::Format_RGB32){ + return image_format(image_format::bgra, image_format::unorm_int8); + } + + BOOST_THROW_EXCEPTION(opencl_error(CL_IMAGE_FORMAT_NOT_SUPPORTED)); +} + +inline QImage::Format qt_image_format_to_qimage_format(const image_format &format) +{ + if(format == image_format(image_format::bgra, image_format::unorm_int8)){ + return QImage::Format_RGB32; + } + + return QImage::Format_Invalid; +} + +inline image_format qt_qimage_get_format(const QImage &image) +{ + return qt_qimage_format_to_image_format(image.format()); +} + +inline void qt_copy_qimage_to_image2d(const QImage &qimage, + image2d &image, + command_queue &queue) +{ + queue.enqueue_write_image(image, image.origin(), image.size(), qimage.constBits()); +} + +inline void qt_copy_image2d_to_qimage(const image2d &image, + QImage &qimage, + command_queue &queue) +{ + queue.enqueue_read_image( + image, dim(0, 0), dim(qimage.width(), qimage.height()), qimage.bits() + ); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_INTEROP_QT_QIMAGE_HPP diff --git a/boost/compute/interop/qt/qpoint.hpp b/boost/compute/interop/qt/qpoint.hpp new file mode 100644 index 0000000000..d867fc7a43 --- /dev/null +++ b/boost/compute/interop/qt/qpoint.hpp @@ -0,0 +1,20 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_INTEROP_QT_QPOINT_HPP +#define BOOST_COMPUTE_INTEROP_QT_QPOINT_HPP + +#include <QPoint> + +#include <boost/compute/type_traits/type_name.hpp> + +BOOST_COMPUTE_TYPE_NAME(QPoint, "int2") + +#endif // BOOST_COMPUTE_INTEROP_QT_QPOINT_HPP diff --git a/boost/compute/interop/qt/qpointf.hpp b/boost/compute/interop/qt/qpointf.hpp new file mode 100644 index 0000000000..fd7392a804 --- /dev/null +++ b/boost/compute/interop/qt/qpointf.hpp @@ -0,0 +1,20 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_INTEROP_QT_QPOINTF_HPP +#define BOOST_COMPUTE_INTEROP_QT_QPOINTF_HPP + +#include <QPointF> + +#include <boost/compute/type_traits/type_name.hpp> + +BOOST_COMPUTE_TYPE_NAME(QPointF, "float2") + +#endif // BOOST_COMPUTE_INTEROP_QT_QPOINTF_HPP diff --git a/boost/compute/interop/qt/qtcore.hpp b/boost/compute/interop/qt/qtcore.hpp new file mode 100644 index 0000000000..b8978180e0 --- /dev/null +++ b/boost/compute/interop/qt/qtcore.hpp @@ -0,0 +1,18 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_INTEROP_QT_QTCORE_HPP +#define BOOST_COMPUTE_INTEROP_QT_QTCORE_HPP + +#include <boost/compute/interop/qt/qpoint.hpp> +#include <boost/compute/interop/qt/qpointf.hpp> +#include <boost/compute/interop/qt/qvector.hpp> + +#endif // BOOST_COMPUTE_INTEROP_QT_QTCORE_HPP diff --git a/boost/compute/interop/qt/qtgui.hpp b/boost/compute/interop/qt/qtgui.hpp new file mode 100644 index 0000000000..f1078f48b9 --- /dev/null +++ b/boost/compute/interop/qt/qtgui.hpp @@ -0,0 +1,16 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_INTEROP_QT_QTGUI_HPP +#define BOOST_COMPUTE_INTEROP_QT_QTGUI_HPP + +#include <boost/compute/interop/qt/qimage.hpp> + +#endif // BOOST_COMPUTE_INTEROP_QT_QTGUI_HPP diff --git a/boost/compute/interop/qt/qvector.hpp b/boost/compute/interop/qt/qvector.hpp new file mode 100644 index 0000000000..3ac93d2aaa --- /dev/null +++ b/boost/compute/interop/qt/qvector.hpp @@ -0,0 +1,48 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_INTEROP_QT_QVECTOR_HPP +#define BOOST_COMPUTE_INTEROP_QT_QVECTOR_HPP + +#include <boost/compute/detail/is_contiguous_iterator.hpp> + +#include <QVector> + +namespace boost { +namespace compute { +namespace detail { + +template<class Iterator> +struct _is_contiguous_iterator< + Iterator, + typename boost::enable_if< + typename boost::is_same< + Iterator, + typename QVector<typename Iterator::value_type>::iterator + >::type + >::type +> : public boost::true_type {}; + +template<class Iterator> +struct _is_contiguous_iterator< + Iterator, + typename boost::enable_if< + typename boost::is_same< + Iterator, + typename QVector<typename Iterator::value_type>::const_iterator + >::type + >::type +> : public boost::true_type {}; + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_INTEROP_QT_QVECTOR_HPP diff --git a/boost/compute/interop/vtk.hpp b/boost/compute/interop/vtk.hpp new file mode 100644 index 0000000000..3f866e5f8f --- /dev/null +++ b/boost/compute/interop/vtk.hpp @@ -0,0 +1,19 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_INTEROP_VTK_HPP +#define BOOST_COMPUTE_INTEROP_VTK_HPP + +#include <boost/compute/interop/vtk/bounds.hpp> +#include <boost/compute/interop/vtk/data_array.hpp> +#include <boost/compute/interop/vtk/matrix4x4.hpp> +#include <boost/compute/interop/vtk/points.hpp> + +#endif // BOOST_COMPUTE_INTEROP_VTK_HPP diff --git a/boost/compute/interop/vtk/bounds.hpp b/boost/compute/interop/vtk/bounds.hpp new file mode 100644 index 0000000000..360a9061f9 --- /dev/null +++ b/boost/compute/interop/vtk/bounds.hpp @@ -0,0 +1,59 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_INTEROP_VTK_BOUNDS_HPP +#define BOOST_COMPUTE_INTEROP_VTK_BOUNDS_HPP + +#include <vector> +#include <iterator> + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/copy_n.hpp> +#include <boost/compute/algorithm/reduce.hpp> +#include <boost/compute/container/array.hpp> + +namespace boost { +namespace compute { + +/// Calculates the bounds for the points in the range [\p first, \p last) and +/// stores the result in \p bounds. +/// +/// For example, this can be used to implement the GetBounds() method for a +/// vtkMapper subclass. +template<class PointIterator> +inline void vtk_compute_bounds(PointIterator first, + PointIterator last, + double bounds[6], + command_queue &queue = system::default_queue()) +{ + typedef typename std::iterator_traits<PointIterator>::value_type T; + + const context &context = queue.get_context(); + + // compute min and max point + array<T, 2> extrema(context); + reduce(first, last, extrema.begin() + 0, min<T>(), queue); + reduce(first, last, extrema.begin() + 1, max<T>(), queue); + + // copy results to host buffer + std::vector<T> buffer(2); + copy_n(extrema.begin(), 2, buffer.begin(), queue); + + // copy to vtk-style bounds + bounds[0] = buffer[0][0]; bounds[1] = buffer[1][0]; + bounds[2] = buffer[0][1]; bounds[3] = buffer[1][1]; + bounds[4] = buffer[0][2]; bounds[5] = buffer[1][2]; +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_INTEROP_VTK_BOUNDS_HPP diff --git a/boost/compute/interop/vtk/data_array.hpp b/boost/compute/interop/vtk/data_array.hpp new file mode 100644 index 0000000000..7b909b1a3f --- /dev/null +++ b/boost/compute/interop/vtk/data_array.hpp @@ -0,0 +1,65 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_INTEROP_VTK_DATA_ARRAY_HPP +#define BOOST_COMPUTE_INTEROP_VTK_DATA_ARRAY_HPP + +#include <vtkDataArray.h> +#include <vtkDataArrayTemplate.h> + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/copy.hpp> +#include <boost/compute/algorithm/copy_n.hpp> +#include <boost/compute/iterator/buffer_iterator.hpp> + +namespace boost { +namespace compute { + +/// Copies the values in \p data to \p buffer. +template<class T> +inline void vtk_copy_data_array_to_buffer(const vtkDataArray *data, + buffer_iterator<T> buffer, + command_queue &queue = system::default_queue()); + +/// \internal_ +template<class T> +inline void vtk_copy_data_array_to_buffer(const vtkDataArrayTemplate<T> *data, + buffer_iterator<T> buffer, + command_queue &queue = system::default_queue()) +{ + vtkDataArrayTemplate<T> *data_ = const_cast<vtkDataArrayTemplate<T> *>(data); + const T *data_ptr = static_cast<const T *>(data_->GetVoidPointer(0)); + size_t data_size = data_->GetNumberOfComponents() * data_->GetNumberOfTuples(); + ::boost::compute::copy_n(data_ptr, data_size, buffer, queue); +} + +/// Copies the values in the range [\p first, \p last) to \p data. +template<class T> +inline void vtk_copy_buffer_to_data_array(buffer_iterator<T> first, + buffer_iterator<T> last, + vtkDataArray *data, + command_queue &queue = system::default_queue()); + +/// \internal_ +template<class T> +inline void vtk_copy_buffer_to_data_array(buffer_iterator<T> first, + buffer_iterator<T> last, + vtkDataArrayTemplate<T> *data, + command_queue &queue = system::default_queue()) +{ + T *data_ptr = static_cast<T *>(data->GetVoidPointer(0)); + ::boost::compute::copy(first, last, data_ptr, queue); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_INTEROP_VTK_DATA_ARRAY_HPP diff --git a/boost/compute/interop/vtk/matrix4x4.hpp b/boost/compute/interop/vtk/matrix4x4.hpp new file mode 100644 index 0000000000..550c49f19f --- /dev/null +++ b/boost/compute/interop/vtk/matrix4x4.hpp @@ -0,0 +1,46 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_INTEROP_VTK_MATRIX4X4_HPP +#define BOOST_COMPUTE_INTEROP_VTK_MATRIX4X4_HPP + +#include <vtkMatrix4x4.h> + +#include <boost/compute/types/fundamental.hpp> + +namespace boost { +namespace compute { + +/// Converts a \c vtkMatrix4x4 to a \c float16_. +inline float16_ vtk_matrix4x4_to_float16(const vtkMatrix4x4 *matrix) +{ + float16_ result; + + for(int i = 0; i < 4; i++){ + for(int j = 0; j < 4; j++){ + result[i*4+j] = matrix->GetElement(i, j); + } + } + + return result; +} + +/// Converts a \c vtkMatrix4x4 to a \c double16_; +inline double16_ vtk_matrix4x4_to_double16(const vtkMatrix4x4 *matrix) +{ + double16_ result; + std::memcpy(&result, matrix->Element, 16 * sizeof(double)); + return result; +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_INTEROP_VTK_MATRIX4X4_HPP diff --git a/boost/compute/interop/vtk/points.hpp b/boost/compute/interop/vtk/points.hpp new file mode 100644 index 0000000000..fefbbb9874 --- /dev/null +++ b/boost/compute/interop/vtk/points.hpp @@ -0,0 +1,55 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_INTEROP_VTK_POINTS_HPP +#define BOOST_COMPUTE_INTEROP_VTK_POINTS_HPP + +#include <vector> + +#include <vtkPoints.h> + +#include <boost/compute/system.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/copy.hpp> +#include <boost/compute/iterator/buffer_iterator.hpp> + +namespace boost { +namespace compute { + +/// Copies \p points to \p buffer. +/// +/// For example, to copy from a \c vtkPoints object to a \c vector<float4_>: +/// \code +/// vtkPoints *points = ... +/// vector<float4_> vector(points->GetNumberOfPoints(), context); +/// vtk_copy_points_to_buffer(points, vector.begin(), queue); +/// \endcode +template<class PointType> +inline void vtk_copy_points_to_buffer(const vtkPoints *points, + buffer_iterator<PointType> buffer, + command_queue &queue = system::default_queue()) +{ + vtkPoints *points_ = const_cast<vtkPoints *>(points); + + // copy points to aligned buffer + std::vector<PointType> tmp(points_->GetNumberOfPoints()); + for(vtkIdType i = 0; i < points_->GetNumberOfPoints(); i++){ + double *p = points_->GetPoint(i); + tmp[i] = PointType(p[0], p[1], p[2], 1); + } + + // copy data to device + copy(tmp.begin(), tmp.end(), buffer, queue); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_INTEROP_VTK_POINTS_HPP diff --git a/boost/compute/iterator.hpp b/boost/compute/iterator.hpp new file mode 100644 index 0000000000..59442b1599 --- /dev/null +++ b/boost/compute/iterator.hpp @@ -0,0 +1,28 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ITERATOR_HPP +#define BOOST_COMPUTE_ITERATOR_HPP + +/// \file +/// +/// Meta-header to include all Boost.Compute iterator headers. + +#include <boost/compute/iterator/buffer_iterator.hpp> +#include <boost/compute/iterator/constant_iterator.hpp> +#include <boost/compute/iterator/constant_buffer_iterator.hpp> +#include <boost/compute/iterator/counting_iterator.hpp> +#include <boost/compute/iterator/discard_iterator.hpp> +#include <boost/compute/iterator/function_input_iterator.hpp> +#include <boost/compute/iterator/permutation_iterator.hpp> +#include <boost/compute/iterator/transform_iterator.hpp> +#include <boost/compute/iterator/zip_iterator.hpp> + +#endif // BOOST_COMPUTE_ITERATOR_HPP diff --git a/boost/compute/iterator/buffer_iterator.hpp b/boost/compute/iterator/buffer_iterator.hpp new file mode 100644 index 0000000000..cd68058f64 --- /dev/null +++ b/boost/compute/iterator/buffer_iterator.hpp @@ -0,0 +1,280 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ITERATOR_BUFFER_ITERATOR_HPP +#define BOOST_COMPUTE_ITERATOR_BUFFER_ITERATOR_HPP + +#include <cstddef> +#include <iterator> + +#include <boost/config.hpp> +#include <boost/type_traits.hpp> +#include <boost/static_assert.hpp> +#include <boost/utility/enable_if.hpp> +#include <boost/iterator/iterator_facade.hpp> + +#include <boost/compute/buffer.hpp> +#include <boost/compute/detail/buffer_value.hpp> +#include <boost/compute/detail/is_buffer_iterator.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/detail/read_write_single_value.hpp> +#include <boost/compute/type_traits/is_device_iterator.hpp> + +namespace boost { +namespace compute { + +// forward declaration for buffer_iterator<T> +template<class T> class buffer_iterator; + +namespace detail { + +// helper class which defines the iterator_facade super-class +// type for buffer_iterator<T> +template<class T> +class buffer_iterator_base +{ +public: + typedef ::boost::iterator_facade< + ::boost::compute::buffer_iterator<T>, + T, + ::std::random_access_iterator_tag, + ::boost::compute::detail::buffer_value<T> + > type; +}; + +template<class T, class IndexExpr> +struct buffer_iterator_index_expr +{ + typedef T result_type; + + buffer_iterator_index_expr(const buffer &buffer, + size_t index, + const memory_object::address_space address_space, + const IndexExpr &expr) + : m_buffer(buffer), + m_index(index), + m_address_space(address_space), + m_expr(expr) + { + } + + operator T() const + { + BOOST_STATIC_ASSERT_MSG(boost::is_integral<IndexExpr>::value, + "Index expression must be integral"); + + return buffer_value<T>(m_buffer, size_t(m_expr) * sizeof(T)); + } + + const buffer &m_buffer; + size_t m_index; + memory_object::address_space m_address_space; + IndexExpr m_expr; +}; + +template<class T, class IndexExpr> +inline meta_kernel& operator<<(meta_kernel &kernel, + const buffer_iterator_index_expr<T, IndexExpr> &expr) +{ + if(expr.m_index == 0){ + return kernel << + kernel.get_buffer_identifier<T>(expr.m_buffer, expr.m_address_space) << + '[' << expr.m_expr << ']'; + } + else { + return kernel << + kernel.get_buffer_identifier<T>(expr.m_buffer, expr.m_address_space) << + '[' << uint_(expr.m_index) << "+(" << expr.m_expr << ")]"; + } +} + +} // end detail namespace + +/// \class buffer_iterator +/// \brief An iterator for values in a buffer. +/// +/// The buffer_iterator class iterates over values in a memory buffer on a +/// compute device. It is the most commonly used iterator in Boost.Compute +/// and is used by the \ref vector "vector<T>" and \ref array "array<T, N>" +/// container classes. +/// +/// Buffer iterators store a reference to a memory buffer along with an index +/// into that memory buffer. +/// +/// The buffer_iterator class allows for arbitrary OpenCL memory objects +/// (including those created outside of Boost.Compute) to be used with the +/// Boost.Compute algorithms (such as transform() and sort()). For example, +/// to reverse the contents of an OpenCL memory buffer containing a set of +/// integers: +/// +/// \snippet test/test_buffer_iterator.cpp reverse_external_buffer +/// +/// \see buffer, make_buffer_iterator() +template<class T> +class buffer_iterator : public detail::buffer_iterator_base<T>::type +{ +public: + typedef typename detail::buffer_iterator_base<T>::type super_type; + typedef typename super_type::reference reference; + typedef typename super_type::difference_type difference_type; + + buffer_iterator() + : m_index(0) + { + } + + buffer_iterator(const buffer &buffer, size_t index) + : m_buffer(buffer.get(), false), + m_index(index) + { + } + + buffer_iterator(const buffer_iterator<T> &other) + : m_buffer(other.m_buffer.get(), false), + m_index(other.m_index) + { + } + + buffer_iterator<T>& operator=(const buffer_iterator<T> &other) + { + if(this != &other){ + m_buffer.get() = other.m_buffer.get(); + m_index = other.m_index; + } + + return *this; + } + + ~buffer_iterator() + { + // set buffer to null so that its reference count will + // not be decremented when its destructor is called + m_buffer.get() = 0; + } + + const buffer& get_buffer() const + { + return m_buffer; + } + + size_t get_index() const + { + return m_index; + } + + T read(command_queue &queue) const + { + BOOST_ASSERT(m_buffer.get()); + BOOST_ASSERT(m_index < m_buffer.size() / sizeof(T)); + + return detail::read_single_value<T>(m_buffer, m_index, queue); + } + + void write(const T &value, command_queue &queue) + { + BOOST_ASSERT(m_buffer.get()); + BOOST_ASSERT(m_index < m_buffer.size() / sizeof(T)); + + detail::write_single_value<T>(value, m_buffer, m_index, queue); + } + + /// \internal_ + template<class Expr> + detail::buffer_iterator_index_expr<T, Expr> + operator[](const Expr &expr) const + { + BOOST_ASSERT(m_buffer.get()); + + return detail::buffer_iterator_index_expr<T, Expr>( + m_buffer, m_index, memory_object::global_memory, expr + ); + } + +private: + friend class ::boost::iterator_core_access; + + /// \internal_ + reference dereference() const + { + return detail::buffer_value<T>(m_buffer, m_index * sizeof(T)); + } + + /// \internal_ + bool equal(const buffer_iterator<T> &other) const + { + return m_buffer.get() == other.m_buffer.get() && + m_index == other.m_index; + } + + /// \internal_ + void increment() + { + m_index++; + } + + /// \internal_ + void decrement() + { + m_index--; + } + + /// \internal_ + void advance(difference_type n) + { + m_index = static_cast<size_t>(static_cast<difference_type>(m_index) + n); + } + + /// \internal_ + difference_type distance_to(const buffer_iterator<T> &other) const + { + return static_cast<difference_type>(other.m_index - m_index); + } + +private: + const buffer m_buffer; + size_t m_index; +}; + +/// Creates a new \ref buffer_iterator for \p buffer at \p index. +/// +/// \param buffer the \ref buffer object +/// \param index the index in the buffer +/// +/// \return a \c buffer_iterator for \p buffer at \p index +template<class T> +inline buffer_iterator<T> +make_buffer_iterator(const buffer &buffer, size_t index = 0) +{ + return buffer_iterator<T>(buffer, index); +} + +/// \internal_ (is_device_iterator specialization for buffer_iterator) +template<class T> +struct is_device_iterator<buffer_iterator<T> > : boost::true_type {}; + +namespace detail { + +// is_buffer_iterator specialization for buffer_iterator +template<class Iterator> +struct is_buffer_iterator< + Iterator, + typename boost::enable_if< + boost::is_same< + buffer_iterator<typename Iterator::value_type>, + typename boost::remove_const<Iterator>::type + > + >::type +> : public boost::true_type {}; + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ITERATOR_BUFFER_ITERATOR_HPP diff --git a/boost/compute/iterator/constant_buffer_iterator.hpp b/boost/compute/iterator/constant_buffer_iterator.hpp new file mode 100644 index 0000000000..ef9a2ac959 --- /dev/null +++ b/boost/compute/iterator/constant_buffer_iterator.hpp @@ -0,0 +1,209 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ITERATOR_CONSTANT_BUFFER_ITERATOR_HPP +#define BOOST_COMPUTE_ITERATOR_CONSTANT_BUFFER_ITERATOR_HPP + +#include <cstddef> +#include <iterator> + +#include <boost/iterator/iterator_facade.hpp> + +#include <boost/compute/buffer.hpp> +#include <boost/compute/iterator/buffer_iterator.hpp> +#include <boost/compute/type_traits/is_device_iterator.hpp> + +namespace boost { +namespace compute { + +// forward declaration for constant_buffer_iterator<T> +template<class T> class constant_buffer_iterator; + +namespace detail { + +// helper class which defines the iterator_facade super-class +// type for constant_buffer_iterator<T> +template<class T> +class constant_buffer_iterator_base +{ +public: + typedef ::boost::iterator_facade< + ::boost::compute::constant_buffer_iterator<T>, + T, + ::std::random_access_iterator_tag, + ::boost::compute::detail::buffer_value<T> + > type; +}; + +} // end detail namespace + +/// \class constant_buffer_iterator +/// \brief An iterator for a buffer in the \c constant memory space. +/// +/// The constant_buffer_iterator class provides an iterator for values in a +/// buffer in the \c constant memory space. +/// +/// For iterating over values in the \c global memory space (the most common +/// case), use the buffer_iterator class. +/// +/// \see buffer_iterator +template<class T> +class constant_buffer_iterator : + public detail::constant_buffer_iterator_base<T>::type +{ +public: + typedef typename detail::constant_buffer_iterator_base<T>::type super_type; + typedef typename super_type::reference reference; + typedef typename super_type::difference_type difference_type; + + constant_buffer_iterator() + : m_buffer(0), + m_index(0) + { + } + + constant_buffer_iterator(const buffer &buffer, size_t index) + : m_buffer(&buffer), + m_index(index) + { + } + + constant_buffer_iterator(const constant_buffer_iterator<T> &other) + : m_buffer(other.m_buffer), + m_index(other.m_index) + { + } + + constant_buffer_iterator<T>& operator=(const constant_buffer_iterator<T> &other) + { + if(this != &other){ + m_buffer = other.m_buffer; + m_index = other.m_index; + } + + return *this; + } + + ~constant_buffer_iterator() + { + } + + const buffer& get_buffer() const + { + return *m_buffer; + } + + size_t get_index() const + { + return m_index; + } + + T read(command_queue &queue) const + { + BOOST_ASSERT(m_buffer && m_buffer->get()); + BOOST_ASSERT(m_index < m_buffer->size() / sizeof(T)); + + return detail::read_single_value<T>(m_buffer, m_index, queue); + } + + void write(const T &value, command_queue &queue) + { + BOOST_ASSERT(m_buffer && m_buffer->get()); + BOOST_ASSERT(m_index < m_buffer->size() / sizeof(T)); + + detail::write_single_value<T>(m_buffer, m_index, queue); + } + + template<class Expr> + detail::buffer_iterator_index_expr<T, Expr> + operator[](const Expr &expr) const + { + BOOST_ASSERT(m_buffer); + BOOST_ASSERT(m_buffer->get()); + + return detail::buffer_iterator_index_expr<T, Expr>( + *m_buffer, m_index, memory_object::constant_memory, expr + ); + } + +private: + friend class ::boost::iterator_core_access; + + reference dereference() const + { + return detail::buffer_value<T>(*m_buffer, m_index); + } + + bool equal(const constant_buffer_iterator<T> &other) const + { + return m_buffer == other.m_buffer && m_index == other.m_index; + } + + void increment() + { + m_index++; + } + + void decrement() + { + m_index--; + } + + void advance(difference_type n) + { + m_index = static_cast<size_t>(static_cast<difference_type>(m_index) + n); + } + + difference_type distance_to(const constant_buffer_iterator<T> &other) const + { + return static_cast<difference_type>(other.m_index - m_index); + } + +private: + const buffer *m_buffer; + size_t m_index; +}; + +/// Creates a new constant_buffer_iterator for \p buffer at \p index. +/// +/// \param buffer the \ref buffer object +/// \param index the index in the buffer +/// +/// \return a \c constant_buffer_iterator for \p buffer at \p index +template<class T> +inline constant_buffer_iterator<T> +make_constant_buffer_iterator(const buffer &buffer, size_t index = 0) +{ + return constant_buffer_iterator<T>(buffer, index); +} + +/// \internal_ (is_device_iterator specialization for constant_buffer_iterator) +template<class T> +struct is_device_iterator<constant_buffer_iterator<T> > : boost::true_type {}; + +namespace detail { + +// is_buffer_iterator specialization for constant_buffer_iterator +template<class Iterator> +struct is_buffer_iterator< + Iterator, + typename boost::enable_if< + boost::is_same< + constant_buffer_iterator<typename Iterator::value_type>, + typename boost::remove_const<Iterator>::type + > + >::type +> : public boost::true_type {}; + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ITERATOR_CONSTANT_BUFFER_ITERATOR_HPP diff --git a/boost/compute/iterator/constant_iterator.hpp b/boost/compute/iterator/constant_iterator.hpp new file mode 100644 index 0000000000..f0d45c02c0 --- /dev/null +++ b/boost/compute/iterator/constant_iterator.hpp @@ -0,0 +1,171 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ITERATOR_CONSTANT_ITERATOR_HPP +#define BOOST_COMPUTE_ITERATOR_CONSTANT_ITERATOR_HPP + +#include <string> +#include <cstddef> +#include <iterator> + +#include <boost/config.hpp> +#include <boost/iterator/iterator_facade.hpp> + +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/type_traits/is_device_iterator.hpp> + +namespace boost { +namespace compute { + +// forward declaration for constant_iterator<T> +template<class T> class constant_iterator; + +namespace detail { + +// helper class which defines the iterator_facade super-class +// type for constant_iterator<T> +template<class T> +class constant_iterator_base +{ +public: + typedef ::boost::iterator_facade< + ::boost::compute::constant_iterator<T>, + T, + ::std::random_access_iterator_tag + > type; +}; + +} // end detail namespace + +/// \class constant_iterator +/// \brief An iterator with a constant value. +/// +/// The constant_iterator class provides an iterator which returns a constant +/// value when dereferenced. +/// +/// For example, this could be used to implement the fill() algorithm in terms +/// of the copy() algorithm by copying from a range of constant iterators: +/// +/// \snippet test/test_constant_iterator.cpp fill_with_copy +/// +/// \see make_constant_iterator() +template<class T> +class constant_iterator : public detail::constant_iterator_base<T>::type +{ +public: + typedef typename detail::constant_iterator_base<T>::type super_type; + typedef typename super_type::reference reference; + typedef typename super_type::difference_type difference_type; + + constant_iterator(const T &value, size_t index = 0) + : m_value(value), + m_index(index) + { + } + + constant_iterator(const constant_iterator<T> &other) + : m_value(other.m_value), + m_index(other.m_index) + { + } + + constant_iterator<T>& operator=(const constant_iterator<T> &other) + { + if(this != &other){ + m_value = other.m_value; + m_index = other.m_index; + } + + return *this; + } + + ~constant_iterator() + { + } + + size_t get_index() const + { + return m_index; + } + + /// \internal_ + template<class Expr> + detail::meta_kernel_literal<T> operator[](const Expr &expr) const + { + (void) expr; + + return detail::meta_kernel::make_lit<T>(m_value); + } + +private: + friend class ::boost::iterator_core_access; + + /// \internal_ + reference dereference() const + { + return m_value; + } + + /// \internal_ + bool equal(const constant_iterator<T> &other) const + { + return m_value == other.m_value && m_index == other.m_index; + } + + /// \internal_ + void increment() + { + m_index++; + } + + /// \internal_ + void decrement() + { + m_index--; + } + + /// \internal_ + void advance(difference_type n) + { + m_index = static_cast<size_t>(static_cast<difference_type>(m_index) + n); + } + + /// \internal_ + difference_type distance_to(const constant_iterator<T> &other) const + { + return static_cast<difference_type>(other.m_index - m_index); + } + +private: + T m_value; + size_t m_index; +}; + +/// Returns a new constant_iterator with \p value at \p index. +/// +/// \param value the constant value +/// \param index the iterators index +/// +/// \return a \c constant_iterator with \p value +template<class T> +inline constant_iterator<T> +make_constant_iterator(const T &value, size_t index = 0) +{ + return constant_iterator<T>(value, index); +} + +/// \internal_ (is_device_iterator specialization for constant_iterator) +template<class T> +struct is_device_iterator<constant_iterator<T> > : boost::true_type {}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ITERATOR_CONSTANT_ITERATOR_HPP diff --git a/boost/compute/iterator/counting_iterator.hpp b/boost/compute/iterator/counting_iterator.hpp new file mode 100644 index 0000000000..304c1e05cf --- /dev/null +++ b/boost/compute/iterator/counting_iterator.hpp @@ -0,0 +1,185 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ITERATOR_COUNTING_ITERATOR_HPP +#define BOOST_COMPUTE_ITERATOR_COUNTING_ITERATOR_HPP + +#include <string> +#include <cstddef> +#include <iterator> + +#include <boost/config.hpp> +#include <boost/iterator/iterator_facade.hpp> + +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/type_traits/is_device_iterator.hpp> + +namespace boost { +namespace compute { + +// forward declaration for counting_iterator<T> +template<class T> class counting_iterator; + +namespace detail { + +// helper class which defines the iterator_facade super-class +// type for counting_iterator<T> +template<class T> +class counting_iterator_base +{ +public: + typedef ::boost::iterator_facade< + ::boost::compute::counting_iterator<T>, + T, + ::std::random_access_iterator_tag + > type; +}; + +template<class T, class IndexExpr> +struct counting_iterator_index_expr +{ + typedef T result_type; + + counting_iterator_index_expr(const T &init, const IndexExpr &expr) + : m_init(init), + m_expr(expr) + { + } + + const T &m_init; + IndexExpr m_expr; +}; + +template<class T, class IndexExpr> +inline meta_kernel& operator<<(meta_kernel &kernel, + const counting_iterator_index_expr<T, IndexExpr> &expr) +{ + return kernel << '(' << expr.m_init << '+' << expr.m_expr << ')'; +} + +} // end detail namespace + +/// \class counting_iterator +/// \brief The counting_iterator class implements a counting iterator. +/// +/// A counting iterator returns an internal value (initialized with \p init) +/// which is incremented each time the iterator is incremented. +/// +/// For example, this could be used to implement the iota() algorithm in terms +/// of the copy() algorithm by copying from a range of counting iterators: +/// +/// \snippet test/test_counting_iterator.cpp iota_with_copy +/// +/// \see make_counting_iterator() +template<class T> +class counting_iterator : public detail::counting_iterator_base<T>::type +{ +public: + typedef typename detail::counting_iterator_base<T>::type super_type; + typedef typename super_type::reference reference; + typedef typename super_type::difference_type difference_type; + + counting_iterator(const T &init) + : m_init(init) + { + } + + counting_iterator(const counting_iterator<T> &other) + : m_init(other.m_init) + { + } + + counting_iterator<T>& operator=(const counting_iterator<T> &other) + { + if(this != &other){ + m_init = other.m_init; + } + + return *this; + } + + ~counting_iterator() + { + } + + size_t get_index() const + { + return 0; + } + + template<class Expr> + detail::counting_iterator_index_expr<T, Expr> + operator[](const Expr &expr) const + { + return detail::counting_iterator_index_expr<T, Expr>(m_init, expr); + } + +private: + friend class ::boost::iterator_core_access; + + reference dereference() const + { + return m_init; + } + + bool equal(const counting_iterator<T> &other) const + { + return m_init == other.m_init; + } + + void increment() + { + m_init++; + } + + void decrement() + { + m_init--; + } + + void advance(difference_type n) + { + m_init += static_cast<T>(n); + } + + difference_type distance_to(const counting_iterator<T> &other) const + { + return difference_type(other.m_init) - difference_type(m_init); + } + +private: + T m_init; +}; + +/// Returns a new counting_iterator starting at \p init. +/// +/// \param init the initial value +/// +/// \return a counting_iterator with \p init. +/// +/// For example, to create a counting iterator which returns unsigned integers +/// and increments from one: +/// \code +/// auto iter = make_counting_iterator<uint_>(1); +/// \endcode +template<class T> +inline counting_iterator<T> make_counting_iterator(const T &init) +{ + return counting_iterator<T>(init); +} + +/// \internal_ (is_device_iterator specialization for counting_iterator) +template<class T> +struct is_device_iterator<counting_iterator<T> > : boost::true_type {}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ITERATOR_COUNTING_ITERATOR_HPP diff --git a/boost/compute/iterator/detail/get_base_iterator_buffer.hpp b/boost/compute/iterator/detail/get_base_iterator_buffer.hpp new file mode 100644 index 0000000000..3d14355115 --- /dev/null +++ b/boost/compute/iterator/detail/get_base_iterator_buffer.hpp @@ -0,0 +1,52 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ITERATOR_DETAIL_GET_BASE_ITERATOR_BUFFER_HPP +#define BOOST_COMPUTE_ITERATOR_DETAIL_GET_BASE_ITERATOR_BUFFER_HPP + +namespace boost { +namespace compute { +namespace detail { + +// returns the buffer for an iterator adaptor's base iterator if +// it exists, otherwise returns a null buffer object. +template<class Iterator> +inline const buffer& +get_base_iterator_buffer(const Iterator &iter, + typename boost::enable_if< + is_buffer_iterator< + typename Iterator::base_type + > + >::type* = 0) +{ + return iter.base().get_buffer(); +} + +template<class Iterator> +inline const buffer& +get_base_iterator_buffer(const Iterator &iter, + typename boost::disable_if< + is_buffer_iterator< + typename Iterator::base_type + > + >::type* = 0) +{ + (void) iter; + + static buffer null_buffer; + + return null_buffer; +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ITERATOR_DETAIL_GET_BASE_ITERATOR_BUFFER_HPP diff --git a/boost/compute/iterator/detail/swizzle_iterator.hpp b/boost/compute/iterator/detail/swizzle_iterator.hpp new file mode 100644 index 0000000000..c7c3c45340 --- /dev/null +++ b/boost/compute/iterator/detail/swizzle_iterator.hpp @@ -0,0 +1,188 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ITERATOR_DETAIL_SWIZZLE_ITERATOR_HPP +#define BOOST_COMPUTE_ITERATOR_DETAIL_SWIZZLE_ITERATOR_HPP + +#include <string> +#include <cstddef> +#include <iterator> + +#include <boost/config.hpp> +#include <boost/iterator/iterator_adaptor.hpp> + +#include <boost/compute/functional.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/type_traits/make_vector_type.hpp> +#include <boost/compute/detail/is_buffer_iterator.hpp> +#include <boost/compute/detail/read_write_single_value.hpp> +#include <boost/compute/iterator/detail/get_base_iterator_buffer.hpp> +#include <boost/compute/type_traits/is_device_iterator.hpp> + +namespace boost { +namespace compute { +namespace detail { + +// forward declaration for swizzle_iterator +template<class InputIterator, size_t Size> +class swizzle_iterator; + +// meta-function returing the value_type for a swizzle_iterator +template<class InputIterator, size_t Size> +struct make_swizzle_iterator_value_type +{ + typedef + typename make_vector_type< + typename scalar_type< + typename std::iterator_traits<InputIterator>::value_type + >::type, + Size + >::type type; +}; + +// helper class which defines the iterator_adaptor super-class +// type for swizzle_iterator +template<class InputIterator, size_t Size> +class swizzle_iterator_base +{ +public: + typedef ::boost::iterator_adaptor< + swizzle_iterator<InputIterator, Size>, + InputIterator, + typename make_swizzle_iterator_value_type<InputIterator, Size>::type, + typename std::iterator_traits<InputIterator>::iterator_category, + typename make_swizzle_iterator_value_type<InputIterator, Size>::type + > type; +}; + +template<class InputIterator, size_t Size, class IndexExpr> +struct swizzle_iterator_index_expr +{ + typedef typename make_swizzle_iterator_value_type<InputIterator, Size>::type result_type; + + swizzle_iterator_index_expr(const InputIterator &input_iter, + const IndexExpr &index_expr, + const std::string &components) + : m_input_iter(input_iter), + m_index_expr(index_expr), + m_components(components) + { + } + + InputIterator m_input_iter; + IndexExpr m_index_expr; + std::string m_components; +}; + +template<class InputIterator, size_t Size, class IndexExpr> +inline meta_kernel& operator<<(meta_kernel &kernel, + const swizzle_iterator_index_expr<InputIterator, + Size, + IndexExpr> &expr) +{ + return kernel << expr.m_input_iter[expr.m_index_expr] + << "." << expr.m_components; +} + +template<class InputIterator, size_t Size> +class swizzle_iterator : + public swizzle_iterator_base<InputIterator, Size>::type +{ +public: + typedef typename + swizzle_iterator_base<InputIterator, Size>::type + super_type; + typedef typename super_type::value_type value_type; + typedef typename super_type::reference reference; + typedef typename super_type::base_type base_type; + typedef typename super_type::difference_type difference_type; + + BOOST_STATIC_CONSTANT(size_t, vector_size = Size); + + swizzle_iterator(InputIterator iterator, const std::string &components) + : super_type(iterator), + m_components(components) + { + BOOST_ASSERT(components.size() == Size); + } + + swizzle_iterator(const swizzle_iterator<InputIterator, Size> &other) + : super_type(other.base()), + m_components(other.m_components) + { + BOOST_ASSERT(m_components.size() == Size); + } + + swizzle_iterator<InputIterator, Size>& + operator=(const swizzle_iterator<InputIterator, Size> &other) + { + if(this != &other){ + super_type::operator=(other); + + m_components = other.m_components; + } + + return *this; + } + + ~swizzle_iterator() + { + } + + size_t get_index() const + { + return super_type::base().get_index(); + } + + const buffer& get_buffer() const + { + return get_base_iterator_buffer(*this); + } + + template<class IndexExpression> + swizzle_iterator_index_expr<InputIterator, Size, IndexExpression> + operator[](const IndexExpression &expr) const + { + return swizzle_iterator_index_expr<InputIterator, + Size, + IndexExpression>(super_type::base(), + expr, + m_components); + } + +private: + friend class ::boost::iterator_core_access; + + reference dereference() const + { + return reference(); + } + +private: + std::string m_components; +}; + +template<size_t Size, class InputIterator> +inline swizzle_iterator<InputIterator, Size> +make_swizzle_iterator(InputIterator iterator, const std::string &components) +{ + return swizzle_iterator<InputIterator, Size>(iterator, components); +} + +} // end detail namespace + +// is_device_iterator specialization for swizzle_iterator +template<size_t Size, class InputIterator> +struct is_device_iterator<detail::swizzle_iterator<InputIterator, Size> > : boost::true_type {}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ITERATOR_SWIZZLE_ITERATOR_HPP diff --git a/boost/compute/iterator/discard_iterator.hpp b/boost/compute/iterator/discard_iterator.hpp new file mode 100644 index 0000000000..e002cf2ac2 --- /dev/null +++ b/boost/compute/iterator/discard_iterator.hpp @@ -0,0 +1,170 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ITERATOR_DISCARD_ITERATOR_HPP +#define BOOST_COMPUTE_ITERATOR_DISCARD_ITERATOR_HPP + +#include <string> +#include <cstddef> +#include <iterator> + +#include <boost/config.hpp> +#include <boost/iterator/iterator_facade.hpp> + +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/type_traits/is_device_iterator.hpp> + +namespace boost { +namespace compute { + +// forward declaration for discard_iterator +class discard_iterator; + +namespace detail { + +// helper class which defines the iterator_facade super-class +// type for discard_iterator +struct discard_iterator_base +{ + typedef ::boost::iterator_facade< + ::boost::compute::discard_iterator, + void, + ::std::random_access_iterator_tag, + void * + > type; +}; + +template<class IndexExpr> +struct discard_iterator_index_expr +{ + typedef void result_type; + + discard_iterator_index_expr(const IndexExpr &expr) + : m_expr(expr) + { + } + + IndexExpr m_expr; +}; + +template<class IndexExpr> +inline meta_kernel& operator<<(meta_kernel &kernel, + const discard_iterator_index_expr<IndexExpr> &expr) +{ + (void) expr; + + return kernel; +} + +} // end detail namespace + +/// \class discard_iterator +/// \brief An iterator which discards all values written to it. +/// +/// \see make_discard_iterator(), constant_iterator +class discard_iterator : public detail::discard_iterator_base::type +{ +public: + typedef detail::discard_iterator_base::type super_type; + typedef super_type::reference reference; + typedef super_type::difference_type difference_type; + + discard_iterator(size_t index = 0) + : m_index(index) + { + } + + discard_iterator(const discard_iterator &other) + : m_index(other.m_index) + { + } + + discard_iterator& operator=(const discard_iterator &other) + { + if(this != &other){ + m_index = other.m_index; + } + + return *this; + } + + ~discard_iterator() + { + } + + /// \internal_ + template<class Expr> + detail::discard_iterator_index_expr<Expr> + operator[](const Expr &expr) const + { + return detail::discard_iterator_index_expr<Expr>(expr); + } + +private: + friend class ::boost::iterator_core_access; + + /// \internal_ + reference dereference() const + { + return 0; + } + + /// \internal_ + bool equal(const discard_iterator &other) const + { + return m_index == other.m_index; + } + + /// \internal_ + void increment() + { + m_index++; + } + + /// \internal_ + void decrement() + { + m_index--; + } + + /// \internal_ + void advance(difference_type n) + { + m_index = static_cast<size_t>(static_cast<difference_type>(m_index) + n); + } + + /// \internal_ + difference_type distance_to(const discard_iterator &other) const + { + return static_cast<difference_type>(other.m_index - m_index); + } + +private: + size_t m_index; +}; + +/// Returns a new discard_iterator with \p index. +/// +/// \param index the index of the iterator +/// +/// \return a \c discard_iterator at \p index +inline discard_iterator make_discard_iterator(size_t index = 0) +{ + return discard_iterator(index); +} + +/// internal_ (is_device_iterator specialization for discard_iterator) +template<> +struct is_device_iterator<discard_iterator> : boost::true_type {}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ITERATOR_DISCARD_ITERATOR_HPP diff --git a/boost/compute/iterator/function_input_iterator.hpp b/boost/compute/iterator/function_input_iterator.hpp new file mode 100644 index 0000000000..bd89b6c0fc --- /dev/null +++ b/boost/compute/iterator/function_input_iterator.hpp @@ -0,0 +1,186 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ITERATOR_FUNCTION_INPUT_ITERATOR_HPP +#define BOOST_COMPUTE_ITERATOR_FUNCTION_INPUT_ITERATOR_HPP + +#include <cstddef> +#include <iterator> + +#include <boost/config.hpp> +#include <boost/iterator/iterator_facade.hpp> + +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/type_traits/is_device_iterator.hpp> +#include <boost/compute/type_traits/result_of.hpp> + +namespace boost { +namespace compute { + +// forward declaration for function_input_iterator<Function> +template<class Function> class function_input_iterator; + +namespace detail { + +// helper class which defines the iterator_facade super-class +// type for function_input_iterator<Function> +template<class Function> +class function_input_iterator_base +{ +public: + typedef ::boost::iterator_facade< + ::boost::compute::function_input_iterator<Function>, + typename ::boost::compute::result_of<Function()>::type, + ::std::random_access_iterator_tag, + typename ::boost::compute::result_of<Function()>::type + > type; +}; + +template<class Function> +struct function_input_iterator_expr +{ + typedef typename ::boost::compute::result_of<Function()>::type result_type; + + function_input_iterator_expr(const Function &function) + : m_function(function) + { + } + + Function m_function; +}; + +template<class Function> +inline meta_kernel& operator<<(meta_kernel &kernel, + const function_input_iterator_expr<Function> &expr) +{ + return kernel << expr.m_function(); +} + +} // end detail namespace + +/// \class function_input_iterator +/// \brief Iterator which returns the result of a function when dereferenced +/// +/// For example: +/// +/// \snippet test/test_function_input_iterator.cpp generate_42 +/// +/// \see make_function_input_iterator() +template<class Function> +class function_input_iterator : + public detail::function_input_iterator_base<Function>::type +{ +public: + typedef typename detail::function_input_iterator_base<Function>::type super_type; + typedef typename super_type::reference reference; + typedef typename super_type::difference_type difference_type; + typedef Function function; + + function_input_iterator(const Function &function, size_t index = 0) + : m_function(function), + m_index(index) + { + } + + function_input_iterator(const function_input_iterator<Function> &other) + : m_function(other.m_function), + m_index(other.m_index) + { + } + + function_input_iterator<Function>& + operator=(const function_input_iterator<Function> &other) + { + if(this != &other){ + m_function = other.m_function; + m_index = other.m_index; + } + + return *this; + } + + ~function_input_iterator() + { + } + + size_t get_index() const + { + return m_index; + } + + template<class Expr> + detail::function_input_iterator_expr<Function> + operator[](const Expr &expr) const + { + (void) expr; + + return detail::function_input_iterator_expr<Function>(m_function); + } + +private: + friend class ::boost::iterator_core_access; + + reference dereference() const + { + return reference(); + } + + bool equal(const function_input_iterator<Function> &other) const + { + return m_function == other.m_function && m_index == other.m_index; + } + + void increment() + { + m_index++; + } + + void decrement() + { + m_index--; + } + + void advance(difference_type n) + { + m_index = static_cast<size_t>(static_cast<difference_type>(m_index) + n); + } + + difference_type + distance_to(const function_input_iterator<Function> &other) const + { + return static_cast<difference_type>(other.m_index - m_index); + } + +private: + Function m_function; + size_t m_index; +}; + +/// Returns a function_input_iterator with \p function. +/// +/// \param function function to execute when dereferenced +/// \param index index of the iterator +/// +/// \return a \c function_input_iterator with \p function +template<class Function> +inline function_input_iterator<Function> +make_function_input_iterator(const Function &function, size_t index = 0) +{ + return function_input_iterator<Function>(function, index); +} + +/// \internal_ (is_device_iterator specialization for function_input_iterator) +template<class Function> +struct is_device_iterator<function_input_iterator<Function> > : boost::true_type {}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ITERATOR_FUNCTION_INPUT_ITERATOR_HPP diff --git a/boost/compute/iterator/permutation_iterator.hpp b/boost/compute/iterator/permutation_iterator.hpp new file mode 100644 index 0000000000..8a7f97a402 --- /dev/null +++ b/boost/compute/iterator/permutation_iterator.hpp @@ -0,0 +1,192 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ITERATOR_PERMUTATION_ITERATOR_HPP +#define BOOST_COMPUTE_ITERATOR_PERMUTATION_ITERATOR_HPP + +#include <string> +#include <cstddef> +#include <iterator> + +#include <boost/config.hpp> +#include <boost/iterator/iterator_adaptor.hpp> + +#include <boost/compute/functional.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/detail/is_buffer_iterator.hpp> +#include <boost/compute/detail/read_write_single_value.hpp> +#include <boost/compute/iterator/detail/get_base_iterator_buffer.hpp> +#include <boost/compute/type_traits/is_device_iterator.hpp> + +namespace boost { +namespace compute { + +// forward declaration for transform_iterator +template<class ElementIterator, class IndexIterator> +class permutation_iterator; + +namespace detail { + +// helper class which defines the iterator_adaptor super-class +// type for permutation_iterator +template<class ElementIterator, class IndexIterator> +class permutation_iterator_base +{ +public: + typedef ::boost::iterator_adaptor< + ::boost::compute::permutation_iterator<ElementIterator, IndexIterator>, + ElementIterator + > type; +}; + +template<class ElementIterator, class IndexIterator, class IndexExpr> +struct permutation_iterator_access_expr +{ + typedef typename std::iterator_traits<ElementIterator>::value_type result_type; + + permutation_iterator_access_expr(const ElementIterator &e, + const IndexIterator &i, + const IndexExpr &expr) + : m_element_iter(e), + m_index_iter(i), + m_expr(expr) + { + } + + ElementIterator m_element_iter; + IndexIterator m_index_iter; + IndexExpr m_expr; +}; + +template<class ElementIterator, class IndexIterator, class IndexExpr> +inline meta_kernel& operator<<(meta_kernel &kernel, + const permutation_iterator_access_expr<ElementIterator, + IndexIterator, + IndexExpr> &expr) +{ + return kernel << expr.m_element_iter[expr.m_index_iter[expr.m_expr]]; +} + +} // end detail namespace + +/// \class permutation_iterator +/// \brief The permutation_iterator class provides a permuation iterator +/// +/// A permutation iterator iterates over a value range and an index range. When +/// dereferenced, it returns the value from the value range using the current +/// index from the index range. +/// +/// For example, to reverse a range using the copy() algorithm and a permutation +/// sequence: +/// +/// \snippet test/test_permutation_iterator.cpp reverse_range +/// +/// \see make_permutation_iterator() +template<class ElementIterator, class IndexIterator> +class permutation_iterator + : public detail::permutation_iterator_base<ElementIterator, + IndexIterator>::type +{ +public: + typedef typename + detail::permutation_iterator_base<ElementIterator, + IndexIterator>::type super_type; + typedef typename super_type::value_type value_type; + typedef typename super_type::reference reference; + typedef typename super_type::base_type base_type; + typedef typename super_type::difference_type difference_type; + typedef IndexIterator index_iterator; + + permutation_iterator(ElementIterator e, IndexIterator i) + : super_type(e), + m_map(i) + { + } + + permutation_iterator(const permutation_iterator<ElementIterator, + IndexIterator> &other) + : super_type(other), + m_map(other.m_map) + { + } + + permutation_iterator<ElementIterator, IndexIterator>& + operator=(const permutation_iterator<ElementIterator, + IndexIterator> &other) + { + if(this != &other){ + super_type::operator=(other); + m_map = other.m_map; + } + + return *this; + } + + ~permutation_iterator() + { + } + + size_t get_index() const + { + return super_type::base().get_index(); + } + + const buffer& get_buffer() const + { + return detail::get_base_iterator_buffer(*this); + } + + template<class IndexExpr> + detail::permutation_iterator_access_expr<ElementIterator, + IndexIterator, + IndexExpr> + operator[](const IndexExpr &expr) const + { + return detail::permutation_iterator_access_expr<ElementIterator, + IndexIterator, + IndexExpr>(super_type::base(), + m_map, + expr); + } + +private: + friend class ::boost::iterator_core_access; + + reference dereference() const + { + return reference(); + } + +private: + IndexIterator m_map; +}; + +/// Returns a permutation_iterator for \p e using indices from \p i. +/// +/// \param e the element range iterator +/// \param i the index range iterator +/// +/// \return a \c permutation_iterator for \p e using \p i +template<class ElementIterator, class IndexIterator> +inline permutation_iterator<ElementIterator, IndexIterator> +make_permutation_iterator(ElementIterator e, IndexIterator i) +{ + return permutation_iterator<ElementIterator, IndexIterator>(e, i); +} + +/// \internal_ (is_device_iterator specialization for permutation_iterator) +template<class ElementIterator, class IndexIterator> +struct is_device_iterator< + permutation_iterator<ElementIterator, IndexIterator> > : boost::true_type {}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ITERATOR_PERMUTATION_ITERATOR_HPP diff --git a/boost/compute/iterator/strided_iterator.hpp b/boost/compute/iterator/strided_iterator.hpp new file mode 100644 index 0000000000..52e7f07bd8 --- /dev/null +++ b/boost/compute/iterator/strided_iterator.hpp @@ -0,0 +1,296 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2015 Jakub Szuppe <j.szuppe@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ITERATOR_STRIDED_ITERATOR_HPP +#define BOOST_COMPUTE_ITERATOR_STRIDED_ITERATOR_HPP + +#include <cstddef> +#include <iterator> + +#include <boost/config.hpp> +#include <boost/iterator/iterator_adaptor.hpp> + +#include <boost/compute/functional.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/detail/is_buffer_iterator.hpp> +#include <boost/compute/detail/read_write_single_value.hpp> +#include <boost/compute/iterator/detail/get_base_iterator_buffer.hpp> +#include <boost/compute/type_traits/is_device_iterator.hpp> +#include <boost/compute/type_traits/result_of.hpp> + +namespace boost { +namespace compute { + +// forward declaration for strided_iterator +template<class Iterator> +class strided_iterator; + +namespace detail { + +// helper class which defines the iterator_adaptor super-class +// type for strided_iterator +template<class Iterator> +class strided_iterator_base +{ +public: + typedef ::boost::iterator_adaptor< + ::boost::compute::strided_iterator<Iterator>, + Iterator + > type; +}; + +// helper class for including stride value in index expression +template<class IndexExpr, class Stride> +struct stride_expr +{ + stride_expr(const IndexExpr &expr, const Stride &stride) + : m_index_expr(expr), + m_stride(stride) + { + } + + IndexExpr m_index_expr; + Stride m_stride; +}; + +template<class IndexExpr, class Stride> +inline stride_expr<IndexExpr, Stride> make_stride_expr(const IndexExpr &expr, + const Stride &stride) +{ + return stride_expr<IndexExpr, Stride>(expr, stride); +} + +template<class IndexExpr, class Stride> +inline meta_kernel& operator<<(meta_kernel &kernel, + const stride_expr<IndexExpr, Stride> &expr) +{ + // (expr.m_stride * (expr.m_index_expr)) + return kernel << "(" << static_cast<ulong_>(expr.m_stride) + << " * (" << expr.m_index_expr << "))"; +} + +template<class Iterator, class Stride, class IndexExpr> +struct strided_iterator_index_expr +{ + typedef typename std::iterator_traits<Iterator>::value_type result_type; + + strided_iterator_index_expr(const Iterator &input_iter, + const Stride &stride, + const IndexExpr &index_expr) + : m_input_iter(input_iter), + m_stride(stride), + m_index_expr(index_expr) + { + } + + Iterator m_input_iter; + const Stride& m_stride; + IndexExpr m_index_expr; +}; + +template<class Iterator, class Stride, class IndexExpr> +inline meta_kernel& operator<<(meta_kernel &kernel, + const strided_iterator_index_expr<Iterator, + Stride, + IndexExpr> &expr) +{ + return kernel << expr.m_input_iter[make_stride_expr(expr.m_index_expr, expr.m_stride)]; +} + +} // end detail namespace + +/// \class strided_iterator +/// \brief An iterator adaptor with adjustable iteration step. +/// +/// The strided iterator adaptor skips over multiple elements each time +/// it is incremented or decremented. +/// +/// \see buffer_iterator, make_strided_iterator(), make_strided_iterator_end() +template<class Iterator> +class strided_iterator : + public detail::strided_iterator_base<Iterator>::type +{ +public: + typedef typename + detail::strided_iterator_base<Iterator>::type super_type; + typedef typename super_type::value_type value_type; + typedef typename super_type::reference reference; + typedef typename super_type::base_type base_type; + typedef typename super_type::difference_type difference_type; + + strided_iterator(Iterator iterator, difference_type stride) + : super_type(iterator), + m_stride(static_cast<difference_type>(stride)) + { + // stride must be greater than zero + BOOST_ASSERT_MSG(stride > 0, "Stride must be greater than zero"); + } + + strided_iterator(const strided_iterator<Iterator> &other) + : super_type(other.base()), + m_stride(other.m_stride) + { + } + + strided_iterator<Iterator>& + operator=(const strided_iterator<Iterator> &other) + { + if(this != &other){ + super_type::operator=(other); + + m_stride = other.m_stride; + } + + return *this; + } + + ~strided_iterator() + { + } + + size_t get_index() const + { + return super_type::base().get_index(); + } + + const buffer& get_buffer() const + { + return detail::get_base_iterator_buffer(*this); + } + + template<class IndexExpression> + detail::strided_iterator_index_expr<Iterator, difference_type, IndexExpression> + operator[](const IndexExpression &expr) const + { + typedef + typename detail::strided_iterator_index_expr<Iterator, + difference_type, + IndexExpression> + StridedIndexExprType; + return StridedIndexExprType(super_type::base(),m_stride, expr); + } + +private: + friend class ::boost::iterator_core_access; + + reference dereference() const + { + return reference(); + } + + bool equal(const strided_iterator<Iterator> &other) const + { + return (other.m_stride == m_stride) + && (other.base_reference() == this->base_reference()); + } + + void increment() + { + std::advance(super_type::base_reference(), m_stride); + } + + void decrement() + { + std::advance(super_type::base_reference(),-m_stride); + } + + void advance(typename super_type::difference_type n) + { + std::advance(super_type::base_reference(), n * m_stride); + } + + difference_type distance_to(const strided_iterator<Iterator> &other) const + { + return std::distance(this->base_reference(), other.base_reference()) / m_stride; + } + +private: + difference_type m_stride; +}; + +/// Returns a strided_iterator for \p iterator with \p stride. +/// +/// \param iterator the underlying iterator +/// \param stride the iteration step for strided_iterator +/// +/// \return a \c strided_iterator for \p iterator with \p stride. +/// +/// For example, to create an iterator which iterates over every other +/// element in a \c vector<int>: +/// \code +/// auto strided_iterator = make_strided_iterator(vec.begin(), 2); +/// \endcode +template<class Iterator> +inline strided_iterator<Iterator> +make_strided_iterator(Iterator iterator, + typename std::iterator_traits<Iterator>::difference_type stride) +{ + return strided_iterator<Iterator>(iterator, stride); +} + +/// Returns a strided_iterator which refers to element that would follow +/// the last element accessible through strided_iterator for \p first iterator +/// with \p stride. +/// +/// Parameter \p stride must be greater than zero. +/// +/// \param first the iterator referring to the first element accessible +/// through strided_iterator for \p first with \p stride +/// \param last the iterator referring to the last element that may be +//// accessible through strided_iterator for \p first with \p stride +/// \param stride the iteration step +/// +/// \return a \c strided_iterator referring to element that would follow +/// the last element accessible through strided_iterator for \p first +/// iterator with \p stride. +/// +/// It can be helpful when iterating over strided_iterator: +/// \code +/// // vec.size() may not be divisible by 3 +/// auto strided_iterator_begin = make_strided_iterator(vec.begin(), 3); +/// auto strided_iterator_end = make_strided_iterator_end(vec.begin(), vec.end(), 3); +/// +/// // copy every 3rd element to result +/// boost::compute::copy( +/// strided_iterator_begin, +/// strided_iterator_end, +/// result.begin(), +/// queue +/// ); +/// \endcode +template<class Iterator> +strided_iterator<Iterator> +make_strided_iterator_end(Iterator first, + Iterator last, + typename std::iterator_traits<Iterator>::difference_type stride) +{ + typedef typename std::iterator_traits<Iterator>::difference_type difference_type; + + // calculate distance from end to the last element that would be + // accessible through strided_iterator. + difference_type range = std::distance(first, last); + difference_type d = (range - 1) / stride; + d *= stride; + d -= range; + // advance from end to the element that would follow the last + // accessible element + Iterator end_for_strided_iterator = last; + std::advance(end_for_strided_iterator, d + stride); + return strided_iterator<Iterator>(end_for_strided_iterator, stride); +} + +/// \internal_ (is_device_iterator specialization for strided_iterator) +template<class Iterator> +struct is_device_iterator<strided_iterator<Iterator> > : boost::true_type {}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ITERATOR_STRIDED_ITERATOR_HPP diff --git a/boost/compute/iterator/transform_iterator.hpp b/boost/compute/iterator/transform_iterator.hpp new file mode 100644 index 0000000000..c040922f9d --- /dev/null +++ b/boost/compute/iterator/transform_iterator.hpp @@ -0,0 +1,227 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ITERATOR_TRANSFORM_ITERATOR_HPP +#define BOOST_COMPUTE_ITERATOR_TRANSFORM_ITERATOR_HPP + +#include <cstddef> +#include <iterator> + +#include <boost/config.hpp> +#include <boost/iterator/iterator_adaptor.hpp> + +#include <boost/compute/functional.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/detail/is_buffer_iterator.hpp> +#include <boost/compute/detail/read_write_single_value.hpp> +#include <boost/compute/iterator/detail/get_base_iterator_buffer.hpp> +#include <boost/compute/type_traits/is_device_iterator.hpp> +#include <boost/compute/type_traits/result_of.hpp> + +namespace boost { +namespace compute { + +// forward declaration for transform_iterator +template<class InputIterator, class UnaryFunction> +class transform_iterator; + +namespace detail { + +// meta-function returning the value_type for a transform_iterator +template<class InputIterator, class UnaryFunction> +struct make_transform_iterator_value_type +{ + typedef typename std::iterator_traits<InputIterator>::value_type value_type; + + typedef typename boost::compute::result_of<UnaryFunction(value_type)>::type type; +}; + +// helper class which defines the iterator_adaptor super-class +// type for transform_iterator +template<class InputIterator, class UnaryFunction> +class transform_iterator_base +{ +public: + typedef ::boost::iterator_adaptor< + ::boost::compute::transform_iterator<InputIterator, UnaryFunction>, + InputIterator, + typename make_transform_iterator_value_type<InputIterator, UnaryFunction>::type, + typename std::iterator_traits<InputIterator>::iterator_category, + typename make_transform_iterator_value_type<InputIterator, UnaryFunction>::type + > type; +}; + +template<class InputIterator, class UnaryFunction, class IndexExpr> +struct transform_iterator_index_expr +{ + typedef typename + make_transform_iterator_value_type< + InputIterator, + UnaryFunction + >::type result_type; + + transform_iterator_index_expr(const InputIterator &input_iter, + const UnaryFunction &transform_expr, + const IndexExpr &index_expr) + : m_input_iter(input_iter), + m_transform_expr(transform_expr), + m_index_expr(index_expr) + { + } + + InputIterator m_input_iter; + UnaryFunction m_transform_expr; + IndexExpr m_index_expr; +}; + +template<class InputIterator, class UnaryFunction, class IndexExpr> +inline meta_kernel& operator<<(meta_kernel &kernel, + const transform_iterator_index_expr<InputIterator, + UnaryFunction, + IndexExpr> &expr) +{ + return kernel << expr.m_transform_expr(expr.m_input_iter[expr.m_index_expr]); +} + +} // end detail namespace + +/// \class transform_iterator +/// \brief A transform iterator adaptor. +/// +/// The transform_iterator adaptor applies a unary function to each element +/// produced from the underlying iterator when dereferenced. +/// +/// For example, to copy from an input range to an output range while taking +/// the absolute value of each element: +/// +/// \snippet test/test_transform_iterator.cpp copy_abs +/// +/// \see buffer_iterator, make_transform_iterator() +template<class InputIterator, class UnaryFunction> +class transform_iterator : + public detail::transform_iterator_base<InputIterator, UnaryFunction>::type +{ +public: + typedef typename + detail::transform_iterator_base<InputIterator, + UnaryFunction>::type super_type; + typedef typename super_type::value_type value_type; + typedef typename super_type::reference reference; + typedef typename super_type::base_type base_type; + typedef typename super_type::difference_type difference_type; + typedef UnaryFunction unary_function; + + transform_iterator(InputIterator iterator, UnaryFunction transform) + : super_type(iterator), + m_transform(transform) + { + } + + transform_iterator(const transform_iterator<InputIterator, + UnaryFunction> &other) + : super_type(other.base()), + m_transform(other.m_transform) + { + } + + transform_iterator<InputIterator, UnaryFunction>& + operator=(const transform_iterator<InputIterator, + UnaryFunction> &other) + { + if(this != &other){ + super_type::operator=(other); + + m_transform = other.m_transform; + } + + return *this; + } + + ~transform_iterator() + { + } + + size_t get_index() const + { + return super_type::base().get_index(); + } + + const buffer& get_buffer() const + { + return detail::get_base_iterator_buffer(*this); + } + + template<class IndexExpression> + detail::transform_iterator_index_expr<InputIterator, UnaryFunction, IndexExpression> + operator[](const IndexExpression &expr) const + { + return detail::transform_iterator_index_expr<InputIterator, + UnaryFunction, + IndexExpression>(super_type::base(), + m_transform, + expr); + } + +private: + friend class ::boost::iterator_core_access; + + reference dereference() const + { + const context &context = super_type::base().get_buffer().get_context(); + command_queue queue(context, context.get_device()); + + detail::meta_kernel k("read"); + size_t output_arg = k.add_arg<value_type *>(memory_object::global_memory, "output"); + k << "*output = " << m_transform(super_type::base()[k.lit(0)]) << ";"; + + kernel kernel = k.compile(context); + + buffer output_buffer(context, sizeof(value_type)); + + kernel.set_arg(output_arg, output_buffer); + + queue.enqueue_task(kernel); + + return detail::read_single_value<value_type>(output_buffer, queue); + } + +private: + UnaryFunction m_transform; +}; + +/// Returns a transform_iterator for \p iterator with \p transform. +/// +/// \param iterator the underlying iterator +/// \param transform the unary transform function +/// +/// \return a \c transform_iterator for \p iterator with \p transform +/// +/// For example, to create an iterator which returns the square-root of each +/// value in a \c vector<int>: +/// \code +/// auto sqrt_iterator = make_transform_iterator(vec.begin(), sqrt<int>()); +/// \endcode +template<class InputIterator, class UnaryFunction> +inline transform_iterator<InputIterator, UnaryFunction> +make_transform_iterator(InputIterator iterator, UnaryFunction transform) +{ + return transform_iterator<InputIterator, + UnaryFunction>(iterator, transform); +} + +/// \internal_ (is_device_iterator specialization for transform_iterator) +template<class InputIterator, class UnaryFunction> +struct is_device_iterator< + transform_iterator<InputIterator, UnaryFunction> > : boost::true_type {}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ITERATOR_TRANSFORM_ITERATOR_HPP diff --git a/boost/compute/iterator/zip_iterator.hpp b/boost/compute/iterator/zip_iterator.hpp new file mode 100644 index 0000000000..2860d73a93 --- /dev/null +++ b/boost/compute/iterator/zip_iterator.hpp @@ -0,0 +1,316 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_ITERATOR_ZIP_ITERATOR_HPP +#define BOOST_COMPUTE_ITERATOR_ZIP_ITERATOR_HPP + +#include <cstddef> +#include <iterator> + +#include <boost/config.hpp> +#include <boost/fusion/algorithm/iteration/for_each.hpp> +#include <boost/iterator/iterator_facade.hpp> +#include <boost/mpl/back_inserter.hpp> +#include <boost/mpl/transform.hpp> +#include <boost/mpl/vector.hpp> +#include <boost/preprocessor/repetition.hpp> +#include <boost/tuple/tuple.hpp> +#include <boost/tuple/tuple_comparison.hpp> + +#include <boost/compute/config.hpp> +#include <boost/compute/functional.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/detail/mpl_vector_to_tuple.hpp> +#include <boost/compute/types/tuple.hpp> +#include <boost/compute/type_traits/is_device_iterator.hpp> +#include <boost/compute/type_traits/type_name.hpp> + +namespace boost { +namespace compute { + +// forward declaration for zip_iterator +template<class IteratorTuple> +class zip_iterator; + +namespace detail { + +namespace mpl = boost::mpl; + +// meta-function returning the value_type for an iterator +template<class Iterator> +struct make_iterator_value_type +{ + typedef typename std::iterator_traits<Iterator>::value_type type; +}; + +// meta-function returning the value_type for a zip_iterator +template<class IteratorTuple> +struct make_zip_iterator_value_type +{ + typedef typename + detail::mpl_vector_to_tuple< + typename mpl::transform< + IteratorTuple, + make_iterator_value_type<mpl::_1>, + mpl::back_inserter<mpl::vector<> > + >::type + >::type type; +}; + +// helper class which defines the iterator_facade super-class +// type for zip_iterator +template<class IteratorTuple> +class zip_iterator_base +{ +public: + typedef ::boost::iterator_facade< + ::boost::compute::zip_iterator<IteratorTuple>, + typename make_zip_iterator_value_type<IteratorTuple>::type, + ::std::random_access_iterator_tag, + typename make_zip_iterator_value_type<IteratorTuple>::type + > type; +}; + +template<class IteratorTuple, class IndexExpr> +struct zip_iterator_index_expr +{ + typedef typename + make_zip_iterator_value_type<IteratorTuple>::type + result_type; + + zip_iterator_index_expr(const IteratorTuple &iterators, + const IndexExpr &index_expr) + : m_iterators(iterators), + m_index_expr(index_expr) + { + } + + IteratorTuple m_iterators; + IndexExpr m_index_expr; +}; + +/// \internal_ +#define BOOST_COMPUTE_PRINT_ELEM(z, n, unused) \ + BOOST_PP_EXPR_IF(n, << ", ") \ + << boost::get<n>(expr.m_iterators)[expr.m_index_expr] + +/// \internal_ +#define BOOST_COMPUTE_PRINT_ZIP_IDX(z, n, unused) \ +template<BOOST_PP_ENUM_PARAMS(n, class Iterator), class IndexExpr> \ +inline meta_kernel& operator<<( \ + meta_kernel &kernel, \ + const zip_iterator_index_expr< \ + boost::tuple<BOOST_PP_ENUM_PARAMS(n, Iterator)>, \ + IndexExpr \ + > &expr) \ +{ \ + typedef typename \ + boost::tuple<BOOST_PP_ENUM_PARAMS(n, Iterator)> \ + tuple_type; \ + typedef typename \ + make_zip_iterator_value_type<tuple_type>::type \ + value_type; \ + kernel.inject_type<value_type>(); \ + return kernel \ + << "(" << type_name<value_type>() << ")" \ + << "{ " \ + BOOST_PP_REPEAT(n, BOOST_COMPUTE_PRINT_ELEM, ~) \ + << "}"; \ +} + +BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_PRINT_ZIP_IDX, ~) + +#undef BOOST_COMPUTE_PRINT_ZIP_IDX +#undef BOOST_COMPUTE_PRINT_ELEM + +struct iterator_advancer +{ + iterator_advancer(size_t n) + : m_distance(n) + { + } + + template<class Iterator> + void operator()(Iterator &i) const + { + std::advance(i, m_distance); + } + + size_t m_distance; +}; + +template<class Iterator> +void increment_iterator(Iterator &i) +{ + i++; +} + +template<class Iterator> +void decrement_iterator(Iterator &i) +{ + i--; +} + +} // end detail namespace + +/// \class zip_iterator +/// \brief A zip iterator adaptor. +/// +/// The zip_iterator class combines values from multiple input iterators. When +/// dereferenced it returns a tuple containing each value at the current +/// position in each input range. +/// +/// \see make_zip_iterator() +template<class IteratorTuple> +class zip_iterator : public detail::zip_iterator_base<IteratorTuple>::type +{ +public: + typedef typename + detail::zip_iterator_base<IteratorTuple>::type + super_type; + typedef typename super_type::value_type value_type; + typedef typename super_type::reference reference; + typedef typename super_type::difference_type difference_type; + typedef IteratorTuple iterator_tuple; + + zip_iterator(IteratorTuple iterators) + : m_iterators(iterators) + { + } + + zip_iterator(const zip_iterator<IteratorTuple> &other) + : m_iterators(other.m_iterators) + { + } + + zip_iterator<IteratorTuple>& + operator=(const zip_iterator<IteratorTuple> &other) + { + if(this != &other){ + super_type::operator=(other); + + m_iterators = other.m_iterators; + } + + return *this; + } + + ~zip_iterator() + { + } + + const IteratorTuple& get_iterator_tuple() const + { + return m_iterators; + } + + template<class IndexExpression> + detail::zip_iterator_index_expr<IteratorTuple, IndexExpression> + operator[](const IndexExpression &expr) const + { + return detail::zip_iterator_index_expr<IteratorTuple, + IndexExpression>(m_iterators, + expr); + } + +private: + friend class ::boost::iterator_core_access; + + reference dereference() const + { + return reference(); + } + + bool equal(const zip_iterator<IteratorTuple> &other) const + { + return m_iterators == other.m_iterators; + } + + void increment() + { + boost::fusion::for_each(m_iterators, detail::increment_iterator); + } + + void decrement() + { + boost::fusion::for_each(m_iterators, detail::decrement_iterator); + } + + void advance(difference_type n) + { + boost::fusion::for_each(m_iterators, detail::iterator_advancer(n)); + } + + difference_type distance_to(const zip_iterator<IteratorTuple> &other) const + { + return std::distance(boost::get<0>(m_iterators), + boost::get<0>(other.m_iterators)); + } + +private: + IteratorTuple m_iterators; +}; + +/// Creates a zip_iterator for \p iterators. +/// +/// \param iterators a tuple of input iterators to zip together +/// +/// \return a \c zip_iterator for \p iterators +/// +/// For example, to zip together iterators from three vectors (\c a, \c b, and +/// \p c): +/// \code +/// auto zipped = boost::compute::make_zip_iterator( +/// boost::make_tuple(a.begin(), b.begin(), c.begin()) +/// ); +/// \endcode +template<class IteratorTuple> +inline zip_iterator<IteratorTuple> +make_zip_iterator(IteratorTuple iterators) +{ + return zip_iterator<IteratorTuple>(iterators); +} + +/// \internal_ (is_device_iterator specialization for zip_iterator) +template<class IteratorTuple> +struct is_device_iterator<zip_iterator<IteratorTuple> > : boost::true_type {}; + +namespace detail { + +// get<N>() specialization for zip_iterator +/// \internal_ +#define BOOST_COMPUTE_ZIP_GET_N(z, n, unused) \ +template<size_t N, class IteratorTuple, class IndexExpr, \ + BOOST_PP_ENUM_PARAMS(n, class T)> \ +inline meta_kernel& \ +operator<<(meta_kernel &kernel, \ + const invoked_get< \ + N, \ + zip_iterator_index_expr<IteratorTuple, IndexExpr>, \ + boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> \ + > &expr) \ +{ \ + typedef typename boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> Tuple; \ + typedef typename boost::tuples::element<N, Tuple>::type T; \ + BOOST_STATIC_ASSERT(N < size_t(boost::tuples::length<Tuple>::value)); \ + kernel.inject_type<T>(); \ + return kernel \ + << boost::get<N>(expr.m_arg.m_iterators)[expr.m_arg.m_index_expr]; \ +} + +BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_ZIP_GET_N, ~) + +#undef BOOST_COMPUTE_ZIP_GET_N + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_ITERATOR_ZIP_ITERATOR_HPP diff --git a/boost/compute/kernel.hpp b/boost/compute/kernel.hpp new file mode 100644 index 0000000000..9494e46de2 --- /dev/null +++ b/boost/compute/kernel.hpp @@ -0,0 +1,394 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_KERNEL_HPP +#define BOOST_COMPUTE_KERNEL_HPP + +#include <string> + +#include <boost/assert.hpp> +#include <boost/utility/enable_if.hpp> + +#include <boost/compute/config.hpp> +#include <boost/compute/program.hpp> +#include <boost/compute/exception.hpp> +#include <boost/compute/type_traits/is_fundamental.hpp> +#include <boost/compute/detail/get_object_info.hpp> +#include <boost/compute/detail/assert_cl_success.hpp> +#include <boost/compute/memory/svm_ptr.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class T> struct set_kernel_arg; + +} // end detail namespace + +/// \class kernel +/// \brief A compute kernel. +/// +/// \see command_queue, program +class kernel +{ +public: + /// Creates a null kernel object. + kernel() + : m_kernel(0) + { + } + + /// Creates a new kernel object for \p kernel. If \p retain is + /// \c true, the reference count for \p kernel will be incremented. + explicit kernel(cl_kernel kernel, bool retain = true) + : m_kernel(kernel) + { + if(m_kernel && retain){ + clRetainKernel(m_kernel); + } + } + + /// Creates a new kernel object with \p name from \p program. + kernel(const program &program, const std::string &name) + { + cl_int error = 0; + m_kernel = clCreateKernel(program.get(), name.c_str(), &error); + + if(!m_kernel){ + BOOST_THROW_EXCEPTION(opencl_error(error)); + } + } + + /// Creates a new kernel object as a copy of \p other. + kernel(const kernel &other) + : m_kernel(other.m_kernel) + { + if(m_kernel){ + clRetainKernel(m_kernel); + } + } + + /// Copies the kernel object from \p other to \c *this. + kernel& operator=(const kernel &other) + { + if(this != &other){ + if(m_kernel){ + clReleaseKernel(m_kernel); + } + + m_kernel = other.m_kernel; + + if(m_kernel){ + clRetainKernel(m_kernel); + } + } + + return *this; + } + + #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES + /// Move-constructs a new kernel object from \p other. + kernel(kernel&& other) BOOST_NOEXCEPT + : m_kernel(other.m_kernel) + { + other.m_kernel = 0; + } + + /// Move-assigns the kernel from \p other to \c *this. + kernel& operator=(kernel&& other) BOOST_NOEXCEPT + { + if(m_kernel){ + clReleaseKernel(m_kernel); + } + + m_kernel = other.m_kernel; + other.m_kernel = 0; + + return *this; + } + #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES + + /// Destroys the kernel object. + ~kernel() + { + if(m_kernel){ + BOOST_COMPUTE_ASSERT_CL_SUCCESS( + clReleaseKernel(m_kernel) + ); + } + } + + /// Returns a reference to the underlying OpenCL kernel object. + cl_kernel& get() const + { + return const_cast<cl_kernel &>(m_kernel); + } + + /// Returns the function name for the kernel. + std::string name() const + { + return get_info<std::string>(CL_KERNEL_FUNCTION_NAME); + } + + /// Returns the number of arguments for the kernel. + size_t arity() const + { + return get_info<cl_uint>(CL_KERNEL_NUM_ARGS); + } + + /// Returns the program for the kernel. + program get_program() const + { + return program(get_info<cl_program>(CL_KERNEL_PROGRAM)); + } + + /// Returns the context for the kernel. + context get_context() const + { + return context(get_info<cl_context>(CL_KERNEL_CONTEXT)); + } + + /// Returns information about the kernel. + /// + /// \see_opencl_ref{clGetKernelInfo} + template<class T> + T get_info(cl_kernel_info info) const + { + return detail::get_object_info<T>(clGetKernelInfo, m_kernel, info); + } + + /// \overload + template<int Enum> + typename detail::get_object_info_type<kernel, Enum>::type + get_info() const; + + #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + /// Returns information about the argument at \p index. + /// + /// For example, to get the name of the first argument: + /// \code + /// std::string arg = kernel.get_arg_info<std::string>(0, CL_KERNEL_ARG_NAME); + /// \endcode + /// + /// Note, this function requires that the program be compiled with the + /// \c "-cl-kernel-arg-info" flag. For example: + /// \code + /// program.build("-cl-kernel-arg-info"); + /// \endcode + /// + /// \opencl_version_warning{1,2} + /// + /// \see_opencl_ref{clGetKernelArgInfo} + template<class T> + T get_arg_info(size_t index, cl_kernel_arg_info info) const + { + return detail::get_object_info<T>(clGetKernelArgInfo, m_kernel, info, index); + } + #endif // CL_VERSION_1_2 + + /// Returns work-group information for the kernel with \p device. + /// + /// \see_opencl_ref{clGetKernelWorkGroupInfo} + template<class T> + T get_work_group_info(const device &device, cl_kernel_work_group_info info) const + { + return detail::get_object_info<T>(clGetKernelWorkGroupInfo, m_kernel, info, device.id()); + } + + /// Sets the argument at \p index to \p value with \p size. + /// + /// \see_opencl_ref{clSetKernelArg} + void set_arg(size_t index, size_t size, const void *value) + { + BOOST_ASSERT(index < arity()); + + cl_int ret = clSetKernelArg(m_kernel, + static_cast<cl_uint>(index), + size, + value); + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + } + + /// Sets the argument at \p index to \p value. + /// + /// For built-in types (e.g. \c float, \c int4_), this is equivalent to + /// calling set_arg(index, sizeof(type), &value). + /// + /// Additionally, this method is specialized for device memory objects + /// such as buffer and image2d. This allows for them to be passed directly + /// without having to extract their underlying cl_mem object. + /// + /// This method is also specialized for device container types such as + /// vector<T> and array<T, N>. This allows for them to be passed directly + /// as kernel arguments without having to extract their underlying buffer. + /// + /// For setting local memory arguments (e.g. "__local float *buf"), the + /// local_buffer<T> class may be used: + /// \code + /// // set argument to a local buffer with storage for 32 float's + /// kernel.set_arg(0, local_buffer<float>(32)); + /// \endcode + template<class T> + void set_arg(size_t index, const T &value) + { + // if you get a compilation error pointing here it means you + // attempted to set a kernel argument from an invalid type. + detail::set_kernel_arg<T>()(*this, index, value); + } + + /// \internal_ + void set_arg(size_t index, const cl_mem mem) + { + set_arg(index, sizeof(cl_mem), static_cast<const void *>(&mem)); + } + + /// \internal_ + void set_arg(size_t index, const cl_sampler sampler) + { + set_arg(index, sizeof(cl_sampler), static_cast<const void *>(&sampler)); + } + + /// \internal_ + template<class T> + void set_arg(size_t index, const svm_ptr<T> ptr) + { + #ifdef CL_VERSION_2_0 + cl_int ret = clSetKernelArgSVMPointer(m_kernel, index, ptr.get()); + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + #else + BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_ARG_VALUE)); + #endif + } + + #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES + /// Sets the arguments for the kernel to \p args. + template<class... T> + void set_args(T&&... args) + { + BOOST_ASSERT(sizeof...(T) <= arity()); + + _set_args<0>(args...); + } + #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES + + #if defined(CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + /// Sets additional execution information for the kernel. + /// + /// \opencl_version_warning{2,0} + /// + /// \see_opencl2_ref{clSetKernelExecInfo} + void set_exec_info(cl_kernel_exec_info info, size_t size, const void *value) + { + cl_int ret = clSetKernelExecInfo(m_kernel, info, size, value); + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + } + #endif // CL_VERSION_2_0 + + /// Returns \c true if the kernel is the same at \p other. + bool operator==(const kernel &other) const + { + return m_kernel == other.m_kernel; + } + + /// Returns \c true if the kernel is different from \p other. + bool operator!=(const kernel &other) const + { + return m_kernel != other.m_kernel; + } + + /// \internal_ + operator cl_kernel() const + { + return m_kernel; + } + + /// \internal_ + static kernel create_with_source(const std::string &source, + const std::string &name, + const context &context) + { + return program::build_with_source(source, context).create_kernel(name); + } + +private: + #ifndef BOOST_NO_VARIADIC_TEMPLATES + /// \internal_ + template<size_t N> + void _set_args() + { + } + + /// \internal_ + template<size_t N, class T, class... Args> + void _set_args(T&& arg, Args&&... rest) + { + set_arg(N, arg); + _set_args<N+1>(rest...); + } + #endif // BOOST_NO_VARIADIC_TEMPLATES + +private: + cl_kernel m_kernel; +}; + +inline kernel program::create_kernel(const std::string &name) const +{ + return kernel(*this, name); +} + +/// \internal_ define get_info() specializations for kernel +BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel, + ((std::string, CL_KERNEL_FUNCTION_NAME)) + ((cl_uint, CL_KERNEL_NUM_ARGS)) + ((cl_uint, CL_KERNEL_REFERENCE_COUNT)) + ((cl_context, CL_KERNEL_CONTEXT)) + ((cl_program, CL_KERNEL_PROGRAM)) +) + +#ifdef CL_VERSION_1_2 +BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel, + ((std::string, CL_KERNEL_ATTRIBUTES)) +) +#endif // CL_VERSION_1_2 + +namespace detail { + +// set_kernel_arg implementation for built-in types +template<class T> +struct set_kernel_arg +{ + typename boost::enable_if<is_fundamental<T> >::type + operator()(kernel &kernel_, size_t index, const T &value) + { + kernel_.set_arg(index, sizeof(T), &value); + } +}; + +// set_kernel_arg specialization for char (different from built-in cl_char) +template<> +struct set_kernel_arg<char> +{ + void operator()(kernel &kernel_, size_t index, const char c) + { + kernel_.set_arg(index, sizeof(char), &c); + } +}; + +} // end detail namespace +} // end namespace compute +} // end namespace boost + +#endif // BOOST_COMPUTE_KERNEL_HPP diff --git a/boost/compute/lambda.hpp b/boost/compute/lambda.hpp new file mode 100644 index 0000000000..24bedfd721 --- /dev/null +++ b/boost/compute/lambda.hpp @@ -0,0 +1,22 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_LAMBDA_HPP +#define BOOST_COMPUTE_LAMBDA_HPP + +#include <boost/compute/lambda/context.hpp> +#include <boost/compute/lambda/functional.hpp> +#include <boost/compute/lambda/get.hpp> +#include <boost/compute/lambda/make_pair.hpp> +#include <boost/compute/lambda/make_tuple.hpp> +#include <boost/compute/lambda/placeholders.hpp> +#include <boost/compute/lambda/result_of.hpp> + +#endif // BOOST_COMPUTE_LAMBDA_HPP diff --git a/boost/compute/lambda/context.hpp b/boost/compute/lambda/context.hpp new file mode 100644 index 0000000000..ed25b79475 --- /dev/null +++ b/boost/compute/lambda/context.hpp @@ -0,0 +1,329 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_LAMBDA_CONTEXT_HPP +#define BOOST_COMPUTE_LAMBDA_CONTEXT_HPP + +#include <boost/proto/core.hpp> +#include <boost/proto/context.hpp> +#include <boost/type_traits.hpp> +#include <boost/preprocessor/repetition.hpp> + +#include <boost/compute/config.hpp> +#include <boost/compute/function.hpp> +#include <boost/compute/lambda/result_of.hpp> +#include <boost/compute/lambda/functional.hpp> +#include <boost/compute/type_traits/result_of.hpp> +#include <boost/compute/type_traits/type_name.hpp> +#include <boost/compute/detail/meta_kernel.hpp> + +namespace boost { +namespace compute { +namespace lambda { + +namespace mpl = boost::mpl; +namespace proto = boost::proto; + +#define BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(tag, op) \ + template<class LHS, class RHS> \ + void operator()(tag, const LHS &lhs, const RHS &rhs) \ + { \ + if(proto::arity_of<LHS>::value > 0){ \ + stream << '('; \ + proto::eval(lhs, *this); \ + stream << ')'; \ + } \ + else { \ + proto::eval(lhs, *this); \ + } \ + \ + stream << op; \ + \ + if(proto::arity_of<RHS>::value > 0){ \ + stream << '('; \ + proto::eval(rhs, *this); \ + stream << ')'; \ + } \ + else { \ + proto::eval(rhs, *this); \ + } \ + } + +// lambda expression context +template<class Args> +struct context : proto::callable_context<context<Args> > +{ + typedef void result_type; + typedef Args args_tuple; + + // create a lambda context for kernel with args + context(boost::compute::detail::meta_kernel &kernel, const Args &args_) + : stream(kernel), + args(args_) + { + } + + // handle terminals + template<class T> + void operator()(proto::tag::terminal, const T &x) + { + // terminal values in lambda expressions are always literals + stream << stream.lit(x); + } + + // handle placeholders + template<int I> + void operator()(proto::tag::terminal, placeholder<I>) + { + stream << boost::get<I>(args); + } + + // handle functions + #define BOOST_COMPUTE_LAMBDA_CONTEXT_FUNCTION_ARG(z, n, unused) \ + BOOST_PP_COMMA_IF(n) BOOST_PP_CAT(const Arg, n) BOOST_PP_CAT(&arg, n) + + #define BOOST_COMPUTE_LAMBDA_CONTEXT_FUNCTION(z, n, unused) \ + template<class F, BOOST_PP_ENUM_PARAMS(n, class Arg)> \ + void operator()( \ + proto::tag::function, \ + const F &function, \ + BOOST_PP_REPEAT(n, BOOST_COMPUTE_LAMBDA_CONTEXT_FUNCTION_ARG, ~) \ + ) \ + { \ + proto::value(function).apply(*this, BOOST_PP_ENUM_PARAMS(n, arg)); \ + } + + BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_LAMBDA_CONTEXT_FUNCTION, ~) + + #undef BOOST_COMPUTE_LAMBDA_CONTEXT_FUNCTION + + // operators + BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::plus, '+') + BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::minus, '-') + BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::multiplies, '*') + BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::divides, '/') + BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::modulus, '%') + BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::less, '<') + BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::greater, '>') + BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::less_equal, "<=") + BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::greater_equal, ">=") + BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::equal_to, "==") + BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::not_equal_to, "!=") + BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::logical_and, "&&") + BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::logical_or, "||") + BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::bitwise_and, '&') + BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::bitwise_or, '|') + BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::bitwise_xor, '^') + BOOST_COMPUTE_LAMBDA_CONTEXT_DEFINE_BINARY_OPERATOR(proto::tag::assign, '=') + + // subscript operator + template<class LHS, class RHS> + void operator()(proto::tag::subscript, const LHS &lhs, const RHS &rhs) + { + proto::eval(lhs, *this); + stream << '['; + proto::eval(rhs, *this); + stream << ']'; + } + + // ternary conditional operator + template<class Pred, class Arg1, class Arg2> + void operator()(proto::tag::if_else_, const Pred &p, const Arg1 &x, const Arg2 &y) + { + proto::eval(p, *this); + stream << '?'; + proto::eval(x, *this); + stream << ':'; + proto::eval(y, *this); + } + + boost::compute::detail::meta_kernel &stream; + Args args; +}; + +namespace detail { + +template<class Expr, class Arg> +struct invoked_unary_expression +{ + typedef typename ::boost::compute::result_of<Expr(Arg)>::type result_type; + + invoked_unary_expression(const Expr &expr, const Arg &arg) + : m_expr(expr), + m_arg(arg) + { + } + + Expr m_expr; + Arg m_arg; +}; + +template<class Expr, class Arg> +boost::compute::detail::meta_kernel& +operator<<(boost::compute::detail::meta_kernel &kernel, + const invoked_unary_expression<Expr, Arg> &expr) +{ + context<boost::tuple<Arg> > ctx(kernel, boost::make_tuple(expr.m_arg)); + proto::eval(expr.m_expr, ctx); + + return kernel; +} + +template<class Expr, class Arg1, class Arg2> +struct invoked_binary_expression +{ + typedef typename ::boost::compute::result_of<Expr(Arg1, Arg2)>::type result_type; + + invoked_binary_expression(const Expr &expr, + const Arg1 &arg1, + const Arg2 &arg2) + : m_expr(expr), + m_arg1(arg1), + m_arg2(arg2) + { + } + + Expr m_expr; + Arg1 m_arg1; + Arg2 m_arg2; +}; + +template<class Expr, class Arg1, class Arg2> +boost::compute::detail::meta_kernel& +operator<<(boost::compute::detail::meta_kernel &kernel, + const invoked_binary_expression<Expr, Arg1, Arg2> &expr) +{ + context<boost::tuple<Arg1, Arg2> > ctx( + kernel, + boost::make_tuple(expr.m_arg1, expr.m_arg2) + ); + proto::eval(expr.m_expr, ctx); + + return kernel; +} + +} // end detail namespace + +// forward declare domain +struct domain; + +// lambda expression wrapper +template<class Expr> +struct expression : proto::extends<Expr, expression<Expr>, domain> +{ + typedef proto::extends<Expr, expression<Expr>, domain> base_type; + + BOOST_PROTO_EXTENDS_USING_ASSIGN(expression) + + expression(const Expr &expr = Expr()) + : base_type(expr) + { + } + + // result_of protocol + template<class Signature> + struct result + { + }; + + template<class This> + struct result<This()> + { + typedef + typename ::boost::compute::lambda::result_of<Expr>::type type; + }; + + template<class This, class Arg> + struct result<This(Arg)> + { + typedef + typename ::boost::compute::lambda::result_of< + Expr, + typename boost::tuple<Arg> + >::type type; + }; + + template<class This, class Arg1, class Arg2> + struct result<This(Arg1, Arg2)> + { + typedef typename + ::boost::compute::lambda::result_of< + Expr, + typename boost::tuple<Arg1, Arg2> + >::type type; + }; + + template<class Arg> + detail::invoked_unary_expression<expression<Expr>, Arg> + operator()(const Arg &x) const + { + return detail::invoked_unary_expression<expression<Expr>, Arg>(*this, x); + } + + template<class Arg1, class Arg2> + detail::invoked_binary_expression<expression<Expr>, Arg1, Arg2> + operator()(const Arg1 &x, const Arg2 &y) const + { + return detail::invoked_binary_expression< + expression<Expr>, + Arg1, + Arg2 + >(*this, x, y); + } + + // function<> conversion operator + template<class R, class A1> + operator function<R(A1)>() const + { + using ::boost::compute::detail::meta_kernel; + + std::stringstream source; + + ::boost::compute::detail::meta_kernel_variable<A1> arg1("x"); + + source << "inline " << type_name<R>() << " lambda" + << ::boost::compute::detail::generate_argument_list<R(A1)>('x') + << "{\n" + << " return " << meta_kernel::expr_to_string((*this)(arg1)) << ";\n" + << "}\n"; + + return make_function_from_source<R(A1)>("lambda", source.str()); + } + + template<class R, class A1, class A2> + operator function<R(A1, A2)>() const + { + using ::boost::compute::detail::meta_kernel; + + std::stringstream source; + + ::boost::compute::detail::meta_kernel_variable<A1> arg1("x"); + ::boost::compute::detail::meta_kernel_variable<A1> arg2("y"); + + source << "inline " << type_name<R>() << " lambda" + << ::boost::compute::detail::generate_argument_list<R(A1, A2)>('x') + << "{\n" + << " return " << meta_kernel::expr_to_string((*this)(arg1, arg2)) << ";\n" + << "}\n"; + + return make_function_from_source<R(A1, A2)>("lambda", source.str()); + } +}; + +// lambda expression domain +struct domain : proto::domain<proto::generator<expression> > +{ +}; + +} // end lambda namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_LAMBDA_CONTEXT_HPP diff --git a/boost/compute/lambda/functional.hpp b/boost/compute/lambda/functional.hpp new file mode 100644 index 0000000000..dd7190e4d9 --- /dev/null +++ b/boost/compute/lambda/functional.hpp @@ -0,0 +1,242 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_LAMBDA_FUNCTIONAL_HPP +#define BOOST_COMPUTE_LAMBDA_FUNCTIONAL_HPP + +#include <boost/tuple/tuple.hpp> +#include <boost/lexical_cast.hpp> + +#include <boost/proto/core.hpp> +#include <boost/preprocessor/cat.hpp> +#include <boost/preprocessor/stringize.hpp> + +#include <boost/compute/functional/get.hpp> +#include <boost/compute/lambda/result_of.hpp> +#include <boost/compute/lambda/placeholder.hpp> + +namespace boost { +namespace compute { +namespace lambda { + +namespace mpl = boost::mpl; +namespace proto = boost::proto; + +// wraps a unary boolean function +#define BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(name) \ + namespace detail { \ + struct BOOST_PP_CAT(name, _func) \ + { \ + template<class Expr, class Args> \ + struct lambda_result \ + { \ + typedef int type; \ + }; \ + \ + template<class Context, class Arg> \ + static void apply(Context &ctx, const Arg &arg) \ + { \ + ctx.stream << #name << "("; \ + proto::eval(arg, ctx); \ + ctx.stream << ")"; \ + } \ + }; \ + } \ + template<class Arg> \ + inline typename proto::result_of::make_expr< \ + proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg& \ + >::type const \ + name(const Arg &arg) \ + { \ + return proto::make_expr<proto::tag::function>( \ + BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg) \ + ); \ + } + +// wraps a unary function who's return type is the same as the argument type +#define BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(name) \ + namespace detail { \ + struct BOOST_PP_CAT(name, _func) \ + { \ + template<class Expr, class Args> \ + struct lambda_result \ + { \ + typedef typename proto::result_of::child_c<Expr, 1>::type Arg1; \ + typedef typename ::boost::compute::lambda::result_of<Arg1, Args>::type type; \ + }; \ + \ + template<class Context, class Arg> \ + static void apply(Context &ctx, const Arg &arg) \ + { \ + ctx.stream << #name << "("; \ + proto::eval(arg, ctx); \ + ctx.stream << ")"; \ + } \ + }; \ + } \ + template<class Arg> \ + inline typename proto::result_of::make_expr< \ + proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg& \ + >::type const \ + name(const Arg &arg) \ + { \ + return proto::make_expr<proto::tag::function>( \ + BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg) \ + ); \ + } + +// wraps a binary function +#define BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(name) \ + namespace detail { \ + struct BOOST_PP_CAT(name, _func) \ + { \ + template<class Expr, class Args> \ + struct lambda_result \ + { \ + typedef typename proto::result_of::child_c<Expr, 1>::type Arg1; \ + typedef typename ::boost::compute::lambda::result_of<Arg1, Args>::type type; \ + }; \ + \ + template<class Context, class Arg1, class Arg2> \ + static void apply(Context &ctx, const Arg1 &arg1, const Arg2 &arg2) \ + { \ + ctx.stream << #name << "("; \ + proto::eval(arg1, ctx); \ + ctx.stream << ", "; \ + proto::eval(arg2, ctx); \ + ctx.stream << ")"; \ + } \ + }; \ + } \ + template<class Arg1, class Arg2> \ + inline typename proto::result_of::make_expr< \ + proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg1&, const Arg2& \ + >::type const \ + name(const Arg1 &arg1, const Arg2 &arg2) \ + { \ + return proto::make_expr<proto::tag::function>( \ + BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg1), ::boost::ref(arg2) \ + ); \ + } + +// wraps a binary function who's result type is the scalar type of the first argument +#define BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_ST(name) \ + namespace detail { \ + struct BOOST_PP_CAT(name, _func) \ + { \ + template<class Expr, class Args> \ + struct lambda_result \ + { \ + typedef typename proto::result_of::child_c<Expr, 1>::type Arg1; \ + typedef typename ::boost::compute::lambda::result_of<Arg1, Args>::type result_type; \ + typedef typename ::boost::compute::scalar_type<result_type>::type type; \ + }; \ + \ + template<class Context, class Arg1, class Arg2> \ + static void apply(Context &ctx, const Arg1 &arg1, const Arg2 &arg2) \ + { \ + ctx.stream << #name << "("; \ + proto::eval(arg1, ctx); \ + ctx.stream << ", "; \ + proto::eval(arg2, ctx); \ + ctx.stream << ")"; \ + } \ + }; \ + } \ + template<class Arg1, class Arg2> \ + inline typename proto::result_of::make_expr< \ + proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg1&, const Arg2& \ + >::type const \ + name(const Arg1 &arg1, const Arg2 &arg2) \ + { \ + return proto::make_expr<proto::tag::function>( \ + BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg1), ::boost::ref(arg2) \ + ); \ + } + +// wraps a ternary function +#define BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(name) \ + namespace detail { \ + struct BOOST_PP_CAT(name, _func) \ + { \ + template<class Expr, class Args> \ + struct lambda_result \ + { \ + typedef typename proto::result_of::child_c<Expr, 1>::type Arg1; \ + typedef typename ::boost::compute::lambda::result_of<Arg1, Args>::type type; \ + }; \ + \ + template<class Context, class Arg1, class Arg2, class Arg3> \ + static void apply(Context &ctx, const Arg1 &arg1, const Arg2 &arg2, const Arg3 &arg3) \ + { \ + ctx.stream << #name << "("; \ + proto::eval(arg1, ctx); \ + ctx.stream << ", "; \ + proto::eval(arg2, ctx); \ + ctx.stream << ", "; \ + proto::eval(arg3, ctx); \ + ctx.stream << ")"; \ + } \ + }; \ + } \ + template<class Arg1, class Arg2, class Arg3> \ + inline typename proto::result_of::make_expr< \ + proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg1&, const Arg2&, const Arg3& \ + >::type const \ + name(const Arg1 &arg1, const Arg2 &arg2, const Arg3 &arg3) \ + { \ + return proto::make_expr<proto::tag::function>( \ + BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg1), ::boost::ref(arg2), ::boost::ref(arg3) \ + ); \ + } + + +BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(all) +BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(any) +BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(isinf) +BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(isnan) +BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(isfinite) + +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(abs) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(cos) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(acos) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(sin) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(asin) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(tan) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(atan) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(sqrt) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(rsqrt) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(exp) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(exp2) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(exp10) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(log) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(log2) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(log10) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(round) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(length) + +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(cross) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(pow) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(pown) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(powr) + +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_ST(dot) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_ST(distance) + +BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(clamp) +BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(fma) +BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(mad) +BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(smoothstep) + +} // end lambda namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_LAMBDA_FUNCTIONAL_HPP diff --git a/boost/compute/lambda/get.hpp b/boost/compute/lambda/get.hpp new file mode 100644 index 0000000000..547c208e95 --- /dev/null +++ b/boost/compute/lambda/get.hpp @@ -0,0 +1,148 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_LAMBDA_GET_HPP +#define BOOST_COMPUTE_LAMBDA_GET_HPP + +#include <boost/preprocessor/repetition.hpp> + +#include <boost/compute/config.hpp> +#include <boost/compute/functional/get.hpp> +#include <boost/compute/lambda/placeholder.hpp> + +namespace boost { +namespace compute { +namespace lambda { +namespace detail { + +// function wrapper for get<N>() in lambda expressions +template<size_t N> +struct get_func +{ + template<class Expr, class Args> + struct lambda_result + { + typedef typename proto::result_of::child_c<Expr, 1>::type Arg; + typedef typename ::boost::compute::lambda::result_of<Arg, Args>::type T; + typedef typename ::boost::compute::detail::get_result_type<N, T>::type type; + }; + + template<class Context, class Arg> + struct make_get_result_type + { + typedef typename boost::remove_cv< + typename boost::compute::lambda::result_of< + Arg, typename Context::args_tuple + >::type + >::type type; + }; + + // returns the suffix string for get<N>() in lambda expressions + // (e.g. ".x" for get<0>() with float4) + template<class T> + struct make_get_suffix + { + static std::string value() + { + BOOST_STATIC_ASSERT(N < 16); + + std::stringstream stream; + + if(N < 10){ + stream << ".s" << uint_(N); + } + else if(N < 16){ + stream << ".s" << char('a' + (N - 10)); + } + + return stream.str(); + } + }; + + // get<N>() specialization for std::pair<T1, T2> + template<class T1, class T2> + struct make_get_suffix<std::pair<T1, T2> > + { + static std::string value() + { + BOOST_STATIC_ASSERT(N < 2); + + if(N == 0){ + return ".first"; + } + else { + return ".second"; + } + }; + }; + + // get<N>() specialization for boost::tuple<T...> + #define BOOST_COMPUTE_LAMBDA_GET_MAKE_TUPLE_SUFFIX(z, n, unused) \ + template<BOOST_PP_ENUM_PARAMS(n, class T)> \ + struct make_get_suffix<boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> > \ + { \ + static std::string value() \ + { \ + BOOST_STATIC_ASSERT(N < n); \ + return ".v" + boost::lexical_cast<std::string>(N); \ + } \ + }; + + BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_LAMBDA_GET_MAKE_TUPLE_SUFFIX, ~) + + #undef BOOST_COMPUTE_LAMBDA_GET_MAKE_TUPLE_SUFFIX + + template<class Context, class Arg> + static void dispatch_apply_terminal(Context &ctx, const Arg &arg) + { + typedef typename make_get_result_type<Context, Arg>::type T; + + proto::eval(arg, ctx); + ctx.stream << make_get_suffix<T>::value(); + } + + template<class Context, int I> + static void dispatch_apply_terminal(Context &ctx, placeholder<I>) + { + ctx.stream << ::boost::compute::get<N>()(::boost::get<I>(ctx.args)); + } + + template<class Context, class Arg> + static void dispatch_apply(Context &ctx, const Arg &arg, proto::tag::terminal) + { + dispatch_apply_terminal(ctx, proto::value(arg)); + } + + template<class Context, class Arg> + static void apply(Context &ctx, const Arg &arg) + { + dispatch_apply(ctx, arg, typename proto::tag_of<Arg>::type()); + } +}; + +} // end detail namespace + +// get<N>() +template<size_t N, class Arg> +inline typename proto::result_of::make_expr< + proto::tag::function, detail::get_func<N>, const Arg& +>::type const +get(const Arg &arg) +{ + return proto::make_expr<proto::tag::function>( + detail::get_func<N>(), ::boost::ref(arg) + ); +} + +} // end lambda namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_LAMBDA_GET_HPP diff --git a/boost/compute/lambda/make_pair.hpp b/boost/compute/lambda/make_pair.hpp new file mode 100644 index 0000000000..3adca97c71 --- /dev/null +++ b/boost/compute/lambda/make_pair.hpp @@ -0,0 +1,70 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_LAMBDA_MAKE_PAIR_HPP +#define BOOST_COMPUTE_LAMBDA_MAKE_PAIR_HPP + +#include <boost/compute/types/pair.hpp> + +namespace boost { +namespace compute { +namespace lambda { +namespace detail { + +// function wrapper for make_pair() in lambda expressions +struct make_pair_func +{ + template<class Expr, class Args> + struct lambda_result + { + typedef typename proto::result_of::child_c<Expr, 1>::type Arg1; + typedef typename proto::result_of::child_c<Expr, 2>::type Arg2; + + typedef typename lambda::result_of<Arg1, Args>::type T1; + typedef typename lambda::result_of<Arg2, Args>::type T2; + + typedef std::pair<T1, T2> type; + }; + + template<class Context, class Arg1, class Arg2> + static void apply(Context &ctx, const Arg1 &arg1, const Arg2 &arg2) + { + typedef typename lambda::result_of<Arg1, typename Context::args_tuple>::type T1; + typedef typename lambda::result_of<Arg2, typename Context::args_tuple>::type T2; + + ctx.stream << "boost_make_pair("; + ctx.stream << type_name<T1>() << ", "; + proto::eval(arg1, ctx); + ctx.stream << ", "; + ctx.stream << type_name<T2>() << ", "; + proto::eval(arg2, ctx); + ctx.stream << ")"; + } +}; + +} // end detail namespace + +// make_pair(first, second) +template<class Arg1, class Arg2> +inline typename proto::result_of::make_expr< + proto::tag::function, detail::make_pair_func, const Arg1&, const Arg2& +>::type const +make_pair(const Arg1 &first, const Arg2 &second) +{ + return proto::make_expr<proto::tag::function>( + detail::make_pair_func(), ::boost::ref(first), ::boost::ref(second) + ); +} + +} // end lambda namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_LAMBDA_MAKE_PAIR_HPP diff --git a/boost/compute/lambda/make_tuple.hpp b/boost/compute/lambda/make_tuple.hpp new file mode 100644 index 0000000000..2d065a999a --- /dev/null +++ b/boost/compute/lambda/make_tuple.hpp @@ -0,0 +1,127 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_HPP +#define BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_HPP + +#include <boost/preprocessor/repetition.hpp> + +#include <boost/compute/config.hpp> +#include <boost/compute/types/tuple.hpp> + +namespace boost { +namespace compute { +namespace lambda { +namespace detail { + +// function wrapper for make_tuple() in lambda expressions +struct make_tuple_func +{ + template<class Expr, class Args, int N> + struct make_tuple_result_type; + + #define BOOST_COMPUTE_MAKE_TUPLE_RESULT_GET_ARG(z, n, unused) \ + typedef typename proto::result_of::child_c<Expr, BOOST_PP_INC(n)>::type BOOST_PP_CAT(Arg, n); + + #define BOOST_COMPUTE_MAKE_TUPLE_RESULT_GET_ARG_TYPE(z, n, unused) \ + typedef typename lambda::result_of<BOOST_PP_CAT(Arg, n), Args>::type BOOST_PP_CAT(T, n); + + #define BOOST_COMPUTE_MAKE_TUPLE_RESULT_TYPE(z, n, unused) \ + template<class Expr, class Args> \ + struct make_tuple_result_type<Expr, Args, n> \ + { \ + BOOST_PP_REPEAT(n, BOOST_COMPUTE_MAKE_TUPLE_RESULT_GET_ARG, ~) \ + BOOST_PP_REPEAT(n, BOOST_COMPUTE_MAKE_TUPLE_RESULT_GET_ARG_TYPE, ~) \ + typedef boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> type; \ + }; + + BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_MAKE_TUPLE_RESULT_TYPE, ~) + + #undef BOOST_COMPUTE_MAKE_TUPLE_RESULT_GET_ARG + #undef BOOST_COMPUTE_MAKE_TUPLE_RESULT_GET_ARG_TYPE + #undef BOOST_COMPUTE_MAKE_TUPLE_RESULT_TYPE + + template<class Expr, class Args> + struct lambda_result + { + typedef typename make_tuple_result_type< + Expr, Args, proto::arity_of<Expr>::value - 1 + >::type type; + }; + + #define BOOST_COMPUTE_MAKE_TUPLE_GET_ARG_TYPE(z, n, unused) \ + typedef typename lambda::result_of< \ + BOOST_PP_CAT(Arg, n), typename Context::args_tuple \ + >::type BOOST_PP_CAT(T, n); + + #define BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_APPLY_ARG(z, n, unused) \ + BOOST_PP_COMMA_IF(n) BOOST_PP_CAT(const Arg, n) BOOST_PP_CAT(&arg, n) + + #define BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_APPLY_EVAL_ARG(z, n, unused) \ + BOOST_PP_EXPR_IF(n, ctx.stream << ", ";) proto::eval(BOOST_PP_CAT(arg, n), ctx); + + #define BOOST_COMPUTE_MAKE_TUPLE_APPLY(z, n, unused) \ + template<class Context, BOOST_PP_ENUM_PARAMS(n, class Arg)> \ + static void apply(Context &ctx, BOOST_PP_REPEAT(n, BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_APPLY_ARG, ~)) \ + { \ + BOOST_PP_REPEAT(n, BOOST_COMPUTE_MAKE_TUPLE_GET_ARG_TYPE, ~) \ + typedef typename boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> tuple_type; \ + ctx.stream.template inject_type<tuple_type>(); \ + ctx.stream << "((" << type_name<tuple_type>() << "){"; \ + BOOST_PP_REPEAT(n, BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_APPLY_EVAL_ARG, ~) \ + ctx.stream << "})"; \ + } + + BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_MAKE_TUPLE_APPLY, ~) + + #undef BOOST_COMPUTE_MAKE_TUPLE_GET_ARG_TYPE + #undef BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_APPLY_ARG + #undef BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_APPLY_EVAL_ARG + #undef BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_APPLY +}; + +} // end detail namespace + +#define BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_ARG(z, n, unused) \ + BOOST_PP_COMMA_IF(n) BOOST_PP_CAT(const Arg, n) BOOST_PP_CAT(&arg, n) + +#define BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_ARG_TYPE(z, n, unused) \ + BOOST_PP_COMMA_IF(n) BOOST_PP_CAT(const Arg, n) & + +#define BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_REF_ARG(z, n, unused) \ + BOOST_PP_COMMA_IF(n) ::boost::ref(BOOST_PP_CAT(arg, n)) + +#define BOOST_COMPUTE_LAMBDA_MAKE_TUPLE(z, n, unused) \ +template<BOOST_PP_ENUM_PARAMS(n, class Arg)> \ +inline typename proto::result_of::make_expr< \ + proto::tag::function, \ + detail::make_tuple_func, \ + BOOST_PP_REPEAT(n, BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_ARG_TYPE, ~) \ +>::type \ +make_tuple(BOOST_PP_REPEAT(n, BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_ARG, ~)) \ +{ \ + return proto::make_expr<proto::tag::function>( \ + detail::make_tuple_func(), \ + BOOST_PP_REPEAT(n, BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_REF_ARG, ~) \ + ); \ +} + +BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_LAMBDA_MAKE_TUPLE, ~) + +#undef BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_ARG +#undef BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_ARG_TYPE +#undef BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_REF_ARG +#undef BOOST_COMPUTE_LAMBDA_MAKE_TUPLE + +} // end lambda namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_LAMBDA_MAKE_TUPLE_HPP diff --git a/boost/compute/lambda/placeholder.hpp b/boost/compute/lambda/placeholder.hpp new file mode 100644 index 0000000000..4774b1b4f3 --- /dev/null +++ b/boost/compute/lambda/placeholder.hpp @@ -0,0 +1,28 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_LAMBDA_PLACEHOLDER_HPP +#define BOOST_COMPUTE_LAMBDA_PLACEHOLDER_HPP + +namespace boost { +namespace compute { +namespace lambda { + +// lambda placeholder type +template<int I> +struct placeholder +{ +}; + +} // end lambda namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_LAMBDA_PLACEHOLDER_HPP diff --git a/boost/compute/lambda/placeholders.hpp b/boost/compute/lambda/placeholders.hpp new file mode 100644 index 0000000000..5c3fe531d1 --- /dev/null +++ b/boost/compute/lambda/placeholders.hpp @@ -0,0 +1,93 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_LAMBDA_PLACEHOLDERS_HPP +#define BOOST_COMPUTE_LAMBDA_PLACEHOLDERS_HPP + +#include <boost/mpl/has_xxx.hpp> + +#include <boost/compute/lambda/context.hpp> +#include <boost/compute/lambda/result_of.hpp> + +namespace boost { +namespace compute { +namespace lambda { + +namespace mpl = boost::mpl; +namespace proto = boost::proto; + +// lambda placeholders +expression<proto::terminal<placeholder<0> >::type> const _1; +expression<proto::terminal<placeholder<1> >::type> const _2; +expression<proto::terminal<placeholder<2> >::type> const _3; + +namespace detail { + +BOOST_MPL_HAS_XXX_TRAIT_DEF(result_type) + +template<class T, bool HasResultType> +struct terminal_type_impl; + +template<class T> +struct terminal_type_impl<T, true> +{ + typedef typename T::result_type type; +}; + +template<class T> +struct terminal_type_impl<T, false> +{ + typedef T type; +}; + +template<class T> +struct terminal_type +{ + typedef typename terminal_type_impl<T, has_result_type<T>::value>::type type; +}; + +} // end detail namespace + +// result_of placeholders +template<class Args> +struct result_of<expression<proto::terminal<placeholder<0> >::type>, Args, proto::tag::terminal> +{ + typedef typename boost::tuples::element<0, Args>::type arg_type; + + typedef typename detail::terminal_type<arg_type>::type type; +}; + +template<class Args> +struct result_of<expression<proto::terminal<placeholder<1> >::type>, Args, proto::tag::terminal> +{ + typedef typename boost::tuples::element<1, Args>::type arg_type; + + typedef typename detail::terminal_type<arg_type>::type type; +}; + +template<class Args> +struct result_of<expression<proto::terminal<placeholder<2> >::type>, Args, proto::tag::terminal> +{ + typedef typename boost::tuples::element<2, Args>::type arg_type; + + typedef typename detail::terminal_type<arg_type>::type type; +}; + +} // end lambda namespace + +// lift lambda placeholders up to the boost::compute namespace +using lambda::_1; +using lambda::_2; +using lambda::_3; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_LAMBDA_PLACEHOLDERS_HPP diff --git a/boost/compute/lambda/result_of.hpp b/boost/compute/lambda/result_of.hpp new file mode 100644 index 0000000000..baae87f05c --- /dev/null +++ b/boost/compute/lambda/result_of.hpp @@ -0,0 +1,113 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_LAMBDA_RESULT_OF_HPP +#define BOOST_COMPUTE_LAMBDA_RESULT_OF_HPP + +#include <boost/mpl/vector.hpp> +#include <boost/proto/proto.hpp> + +#include <boost/compute/type_traits/common_type.hpp> + +namespace boost { +namespace compute { +namespace lambda { + +namespace mpl = boost::mpl; +namespace proto = boost::proto; + +// meta-function returning the result type of a lambda expression +template<class Expr, + class Args = void, + class Tags = typename proto::tag_of<Expr>::type> +struct result_of +{ +}; + +// terminals +template<class Expr, class Args> +struct result_of<Expr, Args, proto::tag::terminal> +{ + typedef typename proto::result_of::value<Expr>::type type; +}; + +// binary operators +#define BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(tag) \ + template<class Expr, class Args> \ + struct result_of<Expr, Args, tag> \ + { \ + typedef typename proto::result_of::child_c<Expr, 0>::type left; \ + typedef typename proto::result_of::child_c<Expr, 1>::type right; \ + \ + typedef typename boost::common_type< \ + typename ::boost::compute::lambda::result_of< \ + left, \ + Args, \ + typename proto::tag_of<left>::type>::type, \ + typename ::boost::compute::lambda::result_of< \ + right, \ + Args, \ + typename proto::tag_of<right>::type>::type \ + >::type type; \ + }; + +BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(proto::tag::plus) +BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(proto::tag::minus) +BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(proto::tag::multiplies) +BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(proto::tag::divides) +BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(proto::tag::modulus) +BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(proto::tag::bitwise_and) +BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(proto::tag::bitwise_or) +BOOST_COMPUTE_LAMBDA_RESULT_OF_BINARY_OPERATOR(proto::tag::bitwise_xor) + +// comparision operators +#define BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(tag) \ + template<class Expr, class Args> \ + struct result_of<Expr, Args, tag> \ + { \ + typedef bool type; \ + }; + +BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(proto::tag::less) +BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(proto::tag::greater) +BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(proto::tag::less_equal) +BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(proto::tag::greater_equal) +BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(proto::tag::equal_to) +BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(proto::tag::not_equal_to) +BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(proto::tag::logical_and) +BOOST_COMPUTE_LAMBDA_RESULT_OF_COMPARISON_OPERATOR(proto::tag::logical_or) + +// assignment operator +template<class Expr, class Args> +struct result_of<Expr, Args, proto::tag::assign> +{ + typedef typename proto::result_of::child_c<Expr, 0>::type left; + typedef typename proto::result_of::child_c<Expr, 1>::type right; + + typedef typename ::boost::compute::lambda::result_of< + right, Args, typename proto::tag_of<right>::type + >::type type; +}; + +// functions +template<class Expr, class Args> +struct result_of<Expr, Args, proto::tag::function> +{ + typedef typename proto::result_of::child_c<Expr, 0>::type func_expr; + typedef typename proto::result_of::value<func_expr>::type func; + + typedef typename func::template lambda_result<Expr, Args>::type type; +}; + +} // end lambda namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_LAMBDA_RESULT_OF_HPP diff --git a/boost/compute/memory.hpp b/boost/compute/memory.hpp new file mode 100644 index 0000000000..b4dccf4890 --- /dev/null +++ b/boost/compute/memory.hpp @@ -0,0 +1,21 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_MEMORY_HPP +#define BOOST_COMPUTE_MEMORY_HPP + +/// \file +/// +/// Meta-header to include all Boost.Compute memory headers. + +#include <boost/compute/memory/local_buffer.hpp> +#include <boost/compute/memory/svm_ptr.hpp> + +#endif // BOOST_COMPUTE_MEMORY_HPP diff --git a/boost/compute/memory/local_buffer.hpp b/boost/compute/memory/local_buffer.hpp new file mode 100644 index 0000000000..aaf631317a --- /dev/null +++ b/boost/compute/memory/local_buffer.hpp @@ -0,0 +1,91 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_MEMORY_LOCAL_BUFFER_HPP +#define BOOST_COMPUTE_MEMORY_LOCAL_BUFFER_HPP + +#include <boost/compute/cl.hpp> +#include <boost/compute/kernel.hpp> + +namespace boost { +namespace compute { + +/// \class local_buffer +/// \brief Represents a local memory buffer on the device. +/// +/// The local_buffer class represents a block of local memory on a compute +/// device. +/// +/// This class is most commonly used to set local memory arguments for compute +/// kernels: +/// \code +/// // set argument to a local buffer with storage for 32 float's +/// kernel.set_arg(0, local_buffer<float>(32)); +/// \endcode +/// +/// \see buffer, kernel +template<class T> +class local_buffer +{ +public: + /// Creates a local buffer object for \p size elements. + local_buffer(const size_t size) + : m_size(size) + { + } + + /// Creates a local buffer object as a copy of \p other. + local_buffer(const local_buffer &other) + : m_size(other.m_size) + { + } + + /// Copies \p other to \c *this. + local_buffer& operator=(const local_buffer &other) + { + if(this != &other){ + m_size = other.m_size; + } + + return *this; + } + + /// Destroys the local memory object. + ~local_buffer() + { + } + + /// Returns the number of elements in the local buffer. + size_t size() const + { + return m_size; + } + +private: + size_t m_size; +}; + +namespace detail { + +// set_kernel_arg specialization for local_buffer<T> +template<class T> +struct set_kernel_arg<local_buffer<T> > +{ + void operator()(kernel &kernel_, size_t index, const local_buffer<T> &buffer) + { + kernel_.set_arg(index, buffer.size() * sizeof(T), 0); + } +}; + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_MEMORY_SVM_PTR_HPP diff --git a/boost/compute/memory/svm_ptr.hpp b/boost/compute/memory/svm_ptr.hpp new file mode 100644 index 0000000000..2dbcb8f635 --- /dev/null +++ b/boost/compute/memory/svm_ptr.hpp @@ -0,0 +1,81 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_MEMORY_SVM_PTR_HPP +#define BOOST_COMPUTE_MEMORY_SVM_PTR_HPP + +#include <boost/compute/cl.hpp> +#include <boost/compute/type_traits/is_device_iterator.hpp> + +namespace boost { +namespace compute { + +template<class T> +class svm_ptr +{ +public: + typedef T value_type; + typedef std::ptrdiff_t difference_type; + typedef T* pointer; + typedef T& reference; + typedef std::random_access_iterator_tag iterator_category; + + svm_ptr() + : m_ptr(0) + { + } + + explicit svm_ptr(void *ptr) + : m_ptr(static_cast<T*>(ptr)) + { + } + + svm_ptr(const svm_ptr<T> &other) + : m_ptr(other.m_ptr) + { + } + + svm_ptr& operator=(const svm_ptr<T> &other) + { + m_ptr = other.m_ptr; + return *this; + } + + ~svm_ptr() + { + } + + void* get() const + { + return m_ptr; + } + + svm_ptr<T> operator+(difference_type n) + { + return svm_ptr<T>(m_ptr + n); + } + + difference_type operator-(svm_ptr<T> other) + { + return m_ptr - other.m_ptr; + } + +private: + T *m_ptr; +}; + +/// \internal_ (is_device_iterator specialization for svm_ptr) +template<class T> +struct is_device_iterator<svm_ptr<T> > : boost::true_type {}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_MEMORY_SVM_PTR_HPP diff --git a/boost/compute/memory_object.hpp b/boost/compute/memory_object.hpp new file mode 100644 index 0000000000..14c4cf4c7e --- /dev/null +++ b/boost/compute/memory_object.hpp @@ -0,0 +1,264 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_MEMORY_OBJECT_HPP +#define BOOST_COMPUTE_MEMORY_OBJECT_HPP + +#include <boost/compute/config.hpp> +#include <boost/compute/context.hpp> +#include <boost/compute/kernel.hpp> +#include <boost/compute/detail/get_object_info.hpp> +#include <boost/compute/detail/assert_cl_success.hpp> + +namespace boost { +namespace compute { + +/// \class memory_object +/// \brief Base-class for memory objects. +/// +/// The memory_object class is the base-class for memory objects on +/// compute devices. +/// +/// \see buffer, vector +class memory_object +{ +public: + /// Flags for the creation of memory objects. + enum mem_flags { + read_write = CL_MEM_READ_WRITE, + read_only = CL_MEM_READ_ONLY, + write_only = CL_MEM_WRITE_ONLY, + use_host_ptr = CL_MEM_USE_HOST_PTR, + alloc_host_ptr = CL_MEM_ALLOC_HOST_PTR, + copy_host_ptr = CL_MEM_COPY_HOST_PTR + #ifdef CL_VERSION_1_2 + , + host_write_only = CL_MEM_HOST_WRITE_ONLY, + host_read_only = CL_MEM_HOST_READ_ONLY, + host_no_access = CL_MEM_HOST_NO_ACCESS + #endif + }; + + /// Symbolic names for the OpenCL address spaces. + enum address_space { + global_memory, + local_memory, + private_memory, + constant_memory + }; + + /// Returns the underlying OpenCL memory object. + cl_mem& get() const + { + return const_cast<cl_mem &>(m_mem); + } + + /// Returns the size of the memory object in bytes. + size_t get_memory_size() const + { + return get_memory_info<size_t>(CL_MEM_SIZE); + } + + /// Returns the type for the memory object. + cl_mem_object_type get_memory_type() const + { + return get_memory_info<cl_mem_object_type>(CL_MEM_TYPE); + } + + /// Returns the flags for the memory object. + cl_mem_flags get_memory_flags() const + { + return get_memory_info<cl_mem_flags>(CL_MEM_FLAGS); + } + + /// Returns the context for the memory object. + context get_context() const + { + return context(get_memory_info<cl_context>(CL_MEM_CONTEXT)); + } + + /// Returns the host pointer associated with the memory object. + void* get_host_ptr() const + { + return get_memory_info<void *>(CL_MEM_HOST_PTR); + } + + /// Returns the reference count for the memory object. + uint_ reference_count() const + { + return get_memory_info<uint_>(CL_MEM_REFERENCE_COUNT); + } + + /// Returns information about the memory object. + /// + /// \see_opencl_ref{clGetMemObjectInfo} + template<class T> + T get_memory_info(cl_mem_info info) const + { + return detail::get_object_info<T>(clGetMemObjectInfo, m_mem, info); + } + + #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + /// Registers a function to be called when the memory object is deleted + /// and its resources freed. + /// + /// \see_opencl_ref{clSetMemObjectDestructorCallback} + /// + /// \opencl_version_warning{1,1} + void set_destructor_callback(void (BOOST_COMPUTE_CL_CALLBACK *callback)( + cl_mem memobj, void *user_data + ), + void *user_data = 0) + { + cl_int ret = clSetMemObjectDestructorCallback(m_mem, callback, user_data); + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + } + /// Registers a function to be called when the memory object is deleted + /// and its resources freed. + /// + /// The function specified by \p callback must be invokable with zero + /// arguments (e.g. \c callback()). + /// + /// \opencl_version_warning{1,1} + template<class Function> + void set_destructor_callback(Function callback) + { + set_destructor_callback( + destructor_callback_invoker, + new boost::function<void()>(callback) + ); + } + #endif // CL_VERSION_1_1 + + /// Returns \c true if the memory object is the same as \p other. + bool operator==(const memory_object &other) const + { + return m_mem == other.m_mem; + } + + /// Returns \c true if the memory object is different from \p other. + bool operator!=(const memory_object &other) const + { + return m_mem != other.m_mem; + } + +private: + #ifdef CL_VERSION_1_1 + /// \internal_ + static void BOOST_COMPUTE_CL_CALLBACK + destructor_callback_invoker(cl_mem, void *user_data) + { + boost::function<void()> *callback = + static_cast<boost::function<void()> *>(user_data); + + (*callback)(); + + delete callback; + } + #endif // CL_VERSION_1_1 + +protected: + /// \internal_ + memory_object() + : m_mem(0) + { + } + + /// \internal_ + explicit memory_object(cl_mem mem, bool retain = true) + : m_mem(mem) + { + if(m_mem && retain){ + clRetainMemObject(m_mem); + } + } + + /// \internal_ + memory_object(const memory_object &other) + : m_mem(other.m_mem) + { + if(m_mem){ + clRetainMemObject(m_mem); + } + } + + /// \internal_ + memory_object& operator=(const memory_object &other) + { + if(this != &other){ + if(m_mem){ + clReleaseMemObject(m_mem); + } + + m_mem = other.m_mem; + + if(m_mem){ + clRetainMemObject(m_mem); + } + } + + return *this; + } + + #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES + /// \internal_ + memory_object(memory_object&& other) BOOST_NOEXCEPT + : m_mem(other.m_mem) + { + other.m_mem = 0; + } + + /// \internal_ + memory_object& operator=(memory_object&& other) BOOST_NOEXCEPT + { + if(m_mem){ + clReleaseMemObject(m_mem); + } + + m_mem = other.m_mem; + other.m_mem = 0; + + return *this; + } + #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES + + /// \internal_ + ~memory_object() + { + if(m_mem){ + BOOST_COMPUTE_ASSERT_CL_SUCCESS( + clReleaseMemObject(m_mem) + ); + } + } + +protected: + cl_mem m_mem; +}; + +namespace detail { + +// set_kernel_arg specialization for memory_object +template<> +struct set_kernel_arg<memory_object> +{ + void operator()(kernel &kernel_, size_t index, const memory_object &mem) + { + kernel_.set_arg(index, mem.get()); + } +}; + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_MEMORY_OBJECT_HPP diff --git a/boost/compute/pipe.hpp b/boost/compute/pipe.hpp new file mode 100644 index 0000000000..944674e622 --- /dev/null +++ b/boost/compute/pipe.hpp @@ -0,0 +1,154 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_PIPE_HPP +#define BOOST_COMPUTE_PIPE_HPP + +#include <boost/compute/cl.hpp> +#include <boost/compute/context.hpp> +#include <boost/compute/memory_object.hpp> +#include <boost/compute/exception/opencl_error.hpp> +#include <boost/compute/detail/get_object_info.hpp> + +// pipe objects require opencl 2.0 +#if defined(CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + +namespace boost { +namespace compute { + +/// \class pipe +/// \brief A FIFO data pipe +/// +/// \opencl_version_warning{2,0} +/// +/// \see memory_object +class pipe : public memory_object +{ +public: + /// Creates a null pipe object. + pipe() + : memory_object() + { + } + + /// Creates a pipe object for \p mem. If \p retain is \c true, the + /// reference count for \p mem will be incremented. + explicit pipe(cl_mem mem, bool retain = true) + : memory_object(mem, retain) + { + } + + /// Creates a new pipe in \p context. + pipe(const context &context, + uint_ pipe_packet_size, + uint_ pipe_max_packets, + cl_mem_flags flags = read_write, + const cl_pipe_properties *properties = 0) + { + cl_int error = 0; + m_mem = clCreatePipe(context, + flags, + pipe_packet_size, + pipe_max_packets, + properties, + &error); + if(!m_mem){ + BOOST_THROW_EXCEPTION(opencl_error(error)); + } + } + + /// Creates a new pipe object as a copy of \p other. + pipe(const pipe &other) + : memory_object(other) + { + } + + /// Copies the pipe object from \p other to \c *this. + pipe& operator=(const pipe &other) + { + if(this != &other){ + memory_object::operator=(other); + } + + return *this; + } + + #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES + /// Move-constructs a new pipe object from \p other. + pipe(pipe&& other) BOOST_NOEXCEPT + : memory_object(std::move(other)) + { + } + + /// Move-assigns the pipe from \p other to \c *this. + pipe& operator=(pipe&& other) BOOST_NOEXCEPT + { + memory_object::operator=(std::move(other)); + + return *this; + } + #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES + + /// Destroys the pipe object. + ~pipe() + { + } + + /// Returns the packet size. + uint_ packet_size() const + { + return get_info<uint_>(CL_PIPE_PACKET_SIZE); + } + + /// Returns the max number of packets. + uint_ max_packets() const + { + return get_info<uint_>(CL_PIPE_MAX_PACKETS); + } + + /// Returns information about the pipe. + /// + /// \see_opencl2_ref{clGetPipeInfo} + template<class T> + T get_info(cl_pipe_info info) const + { + return detail::get_object_info<T>(clGetPipeInfo, m_mem, info); + } + + /// \overload + template<int Enum> + typename detail::get_object_info_type<pipe, Enum>::type get_info() const; +}; + +/// \internal_ define get_info() specializations for pipe +BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(pipe, + ((cl_uint, CL_PIPE_PACKET_SIZE)) + ((cl_uint, CL_PIPE_MAX_PACKETS)) +) + +namespace detail { + +// set_kernel_arg specialization for pipe +template<> +struct set_kernel_arg<pipe> +{ + void operator()(kernel &kernel_, size_t index, const pipe &pipe_) + { + kernel_.set_arg(index, pipe_.get()); + } +}; + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // CL_VERSION_2_0 + +#endif // BOOST_COMPUTE_PIPE_HPP diff --git a/boost/compute/platform.hpp b/boost/compute/platform.hpp new file mode 100644 index 0000000000..65fda84e11 --- /dev/null +++ b/boost/compute/platform.hpp @@ -0,0 +1,235 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_PLATFORM_HPP +#define BOOST_COMPUTE_PLATFORM_HPP + +#include <algorithm> +#include <string> +#include <vector> + +#include <boost/algorithm/string/split.hpp> +#include <boost/algorithm/string/classification.hpp> + +#include <boost/compute/cl.hpp> +#include <boost/compute/device.hpp> +#include <boost/compute/detail/get_object_info.hpp> + +namespace boost { +namespace compute { + +/// \class platform +/// \brief A compute platform. +/// +/// The platform class provides an interface to an OpenCL platform. +/// +/// To obtain a list of all platforms on the system use the +/// system::platforms() method. +/// +/// \see device, context +class platform +{ +public: + /// Creates a new platform object for \p id. + explicit platform(cl_platform_id id) + : m_platform(id) + { + } + + /// Creates a new platform as a copy of \p other. + platform(const platform &other) + : m_platform(other.m_platform) + { + } + + /// Copies the platform id from \p other. + platform& operator=(const platform &other) + { + if(this != &other){ + m_platform = other.m_platform; + } + + return *this; + } + + /// Destroys the platform object. + ~platform() + { + } + + /// Returns the ID of the platform. + cl_platform_id id() const + { + return m_platform; + } + + /// Returns the name of the platform. + std::string name() const + { + return get_info<std::string>(CL_PLATFORM_NAME); + } + + /// Returns the name of the vendor for the platform. + std::string vendor() const + { + return get_info<std::string>(CL_PLATFORM_VENDOR); + } + + /// Returns the profile string for the platform. + std::string profile() const + { + return get_info<std::string>(CL_PLATFORM_PROFILE); + } + + /// Returns the version string for the platform. + std::string version() const + { + return get_info<std::string>(CL_PLATFORM_VERSION); + } + + /// Returns a list of extensions supported by the platform. + std::vector<std::string> extensions() const + { + std::string extensions_string = + get_info<std::string>(CL_PLATFORM_EXTENSIONS); + std::vector<std::string> extensions_vector; + boost::split(extensions_vector, + extensions_string, + boost::is_any_of("\t "), + boost::token_compress_on); + return extensions_vector; + } + + /// Returns \c true if the platform supports the extension with + /// \p name. + bool supports_extension(const std::string &name) const + { + const std::vector<std::string> extensions = this->extensions(); + + return std::find( + extensions.begin(), extensions.end(), name) != extensions.end(); + } + + /// Returns a list of devices on the platform. + std::vector<device> devices(cl_device_type type = CL_DEVICE_TYPE_ALL) const + { + size_t count = device_count(type); + if(count == 0){ + // no devices for this platform + return std::vector<device>(); + } + + std::vector<cl_device_id> device_ids(count); + cl_int ret = clGetDeviceIDs(m_platform, + type, + static_cast<cl_uint>(count), + &device_ids[0], + 0); + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + std::vector<device> devices; + for(cl_uint i = 0; i < count; i++){ + devices.push_back(device(device_ids[i])); + } + + return devices; + } + + /// Returns the number of devices on the platform. + size_t device_count(cl_device_type type = CL_DEVICE_TYPE_ALL) const + { + cl_uint count = 0; + cl_int ret = clGetDeviceIDs(m_platform, type, 0, 0, &count); + if(ret != CL_SUCCESS){ + if(ret == CL_DEVICE_NOT_FOUND){ + // no devices for this platform + return 0; + } + else { + // something else went wrong + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + } + + return count; + } + + /// Returns information about the platform. + /// + /// \see_opencl_ref{clGetPlatformInfo} + template<class T> + T get_info(cl_platform_info info) const + { + return detail::get_object_info<T>(clGetPlatformInfo, m_platform, info); + } + + /// \overload + template<int Enum> + typename detail::get_object_info_type<platform, Enum>::type + get_info() const; + + /// Returns the address of the \p function_name extension + /// function. Returns \c 0 if \p function_name is invalid. + void* get_extension_function_address(const char *function_name) const + { + #ifdef CL_VERSION_1_2 + return clGetExtensionFunctionAddressForPlatform(m_platform, + function_name); + #else + return clGetExtensionFunctionAddress(function_name); + #endif + } + + /// Requests that the platform unload any compiler resources. + void unload_compiler() + { + #ifdef CL_VERSION_1_2 + clUnloadPlatformCompiler(m_platform); + #else + clUnloadCompiler(); + #endif + } + + /// Returns \c true if the platform is the same at \p other. + bool operator==(const platform &other) const + { + return m_platform == other.m_platform; + } + + /// Returns \c true if the platform is different from \p other. + bool operator!=(const platform &other) const + { + return m_platform != other.m_platform; + } + +private: + cl_platform_id m_platform; +}; + +/// \internal_ define get_info() specializations for platform +BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(platform, + ((std::string, CL_PLATFORM_PROFILE)) + ((std::string, CL_PLATFORM_VERSION)) + ((std::string, CL_PLATFORM_NAME)) + ((std::string, CL_PLATFORM_VENDOR)) + ((std::string, CL_PLATFORM_EXTENSIONS)) +) + +inline boost::compute::platform device::platform() const +{ + return boost::compute::platform(get_info<CL_DEVICE_PLATFORM>()); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_PLATFORM_HPP diff --git a/boost/compute/program.hpp b/boost/compute/program.hpp new file mode 100644 index 0000000000..7573aa02e6 --- /dev/null +++ b/boost/compute/program.hpp @@ -0,0 +1,650 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_PROGRAM_HPP +#define BOOST_COMPUTE_PROGRAM_HPP + +#include <string> +#include <vector> +#include <fstream> +#include <streambuf> + +#ifdef BOOST_COMPUTE_DEBUG_KERNEL_COMPILATION +#include <iostream> +#endif + +#include <boost/compute/config.hpp> +#include <boost/compute/context.hpp> +#include <boost/compute/exception.hpp> +#include <boost/compute/detail/assert_cl_success.hpp> + +#ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE +#include <sstream> +#include <boost/optional.hpp> +#include <boost/compute/platform.hpp> +#include <boost/compute/detail/getenv.hpp> +#include <boost/compute/detail/path.hpp> +#include <boost/compute/detail/sha1.hpp> +#endif + +namespace boost { +namespace compute { + +class kernel; + +/// \class program +/// \brief A compute program. +/// +/// The program class represents an OpenCL program. +/// +/// Program objects are created with one of the static \c create_with_* +/// functions. For example, to create a program from a source string: +/// +/// \snippet test/test_program.cpp create_with_source +/// +/// And to create a program from a source file: +/// \code +/// boost::compute::program bar_program = +/// boost::compute::program::create_with_source_file("/path/to/bar.cl", context); +/// \endcode +/// +/// Once a program object has been succesfully created, it can be compiled +/// using the \c build() method: +/// \code +/// // build the program +/// foo_program.build(); +/// \endcode +/// +/// Once the program is built, \ref kernel objects can be created using the +/// \c create_kernel() method by passing their name: +/// \code +/// // create a kernel from the compiled program +/// boost::compute::kernel foo_kernel = foo_program.create_kernel("foo"); +/// \endcode +/// +/// \see kernel +class program +{ +public: + /// Creates a null program object. + program() + : m_program(0) + { + } + + /// Creates a program object for \p program. If \p retain is \c true, + /// the reference count for \p program will be incremented. + explicit program(cl_program program, bool retain = true) + : m_program(program) + { + if(m_program && retain){ + clRetainProgram(m_program); + } + } + + /// Creates a new program object as a copy of \p other. + program(const program &other) + : m_program(other.m_program) + { + if(m_program){ + clRetainProgram(m_program); + } + } + + /// Copies the program object from \p other to \c *this. + program& operator=(const program &other) + { + if(this != &other){ + if(m_program){ + clReleaseProgram(m_program); + } + + m_program = other.m_program; + + if(m_program){ + clRetainProgram(m_program); + } + } + + return *this; + } + + #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES + /// Move-constructs a new program object from \p other. + program(program&& other) BOOST_NOEXCEPT + : m_program(other.m_program) + { + other.m_program = 0; + } + + /// Move-assigns the program from \p other to \c *this. + program& operator=(program&& other) BOOST_NOEXCEPT + { + if(m_program){ + clReleaseProgram(m_program); + } + + m_program = other.m_program; + other.m_program = 0; + + return *this; + } + #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES + + /// Destroys the program object. + ~program() + { + if(m_program){ + BOOST_COMPUTE_ASSERT_CL_SUCCESS( + clReleaseProgram(m_program) + ); + } + } + + /// Returns the underlying OpenCL program. + cl_program& get() const + { + return const_cast<cl_program &>(m_program); + } + + /// Returns the source code for the program. + std::string source() const + { + return get_info<std::string>(CL_PROGRAM_SOURCE); + } + + /// Returns the binary for the program. + std::vector<unsigned char> binary() const + { + size_t binary_size = get_info<size_t>(CL_PROGRAM_BINARY_SIZES); + std::vector<unsigned char> binary(binary_size); + + unsigned char *binary_ptr = &binary[0]; + cl_int error = clGetProgramInfo(m_program, + CL_PROGRAM_BINARIES, + sizeof(unsigned char **), + &binary_ptr, + 0); + if(error != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(error)); + } + + return binary; + } + + std::vector<device> get_devices() const + { + std::vector<cl_device_id> device_ids = + get_info<std::vector<cl_device_id> >(CL_PROGRAM_DEVICES); + + std::vector<device> devices; + for(size_t i = 0; i < device_ids.size(); i++){ + devices.push_back(device(device_ids[i])); + } + + return devices; + } + + /// Returns the context for the program. + context get_context() const + { + return context(get_info<cl_context>(CL_PROGRAM_CONTEXT)); + } + + /// Returns information about the program. + /// + /// \see_opencl_ref{clGetProgramInfo} + template<class T> + T get_info(cl_program_info info) const + { + return detail::get_object_info<T>(clGetProgramInfo, m_program, info); + } + + /// \overload + template<int Enum> + typename detail::get_object_info_type<program, Enum>::type + get_info() const; + + /// Returns build information about the program. + /// + /// For example, this function can be used to retreive the options used + /// to build the program: + /// \code + /// std::string build_options = + /// program.get_build_info<std::string>(CL_PROGRAM_BUILD_OPTIONS); + /// \endcode + /// + /// \see_opencl_ref{clGetProgramInfo} + template<class T> + T get_build_info(cl_program_build_info info, const device &device) const + { + return detail::get_object_info<T>(clGetProgramBuildInfo, m_program, info, device.id()); + } + + /// Builds the program with \p options. + /// + /// If the program fails to compile, this function will throw an + /// opencl_error exception. + /// \code + /// try { + /// // attempt to compile to program + /// program.build(); + /// } + /// catch(boost::compute::opencl_error &e){ + /// // program failed to compile, print out the build log + /// std::cout << program.build_log() << std::endl; + /// } + /// \endcode + /// + /// \see_opencl_ref{clBuildProgram} + void build(const std::string &options = std::string()) + { + const char *options_string = 0; + + if(!options.empty()){ + options_string = options.c_str(); + } + + cl_int ret = clBuildProgram(m_program, 0, 0, options_string, 0, 0); + + #ifdef BOOST_COMPUTE_DEBUG_KERNEL_COMPILATION + if(ret != CL_SUCCESS){ + // print the error, source code and build log + std::cerr << "Boost.Compute: " + << "kernel compilation failed (" << ret << ")\n" + << "--- source ---\n" + << source() + << "\n--- build log ---\n" + << build_log() + << std::endl; + } + #endif + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + } + + #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + /// Compiles the program with \p options. + /// + /// \opencl_version_warning{1,2} + /// + /// \see_opencl_ref{clCompileProgram} + void compile(const std::string &options = std::string()) + { + const char *options_string = 0; + + if(!options.empty()){ + options_string = options.c_str(); + } + + cl_int ret = clCompileProgram( + m_program, 0, 0, options_string, 0, 0, 0, 0, 0 + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + } + + /// Links the programs in \p programs with \p options in \p context. + /// + /// \opencl_version_warning{1,2} + /// + /// \see_opencl_ref{clLinkProgram} + static program link(const std::vector<program> &programs, + const context &context, + const std::string &options = std::string()) + { + const char *options_string = 0; + + if(!options.empty()){ + options_string = options.c_str(); + } + + cl_int ret; + cl_program program_ = clLinkProgram( + context.get(), + 0, + 0, + options_string, + static_cast<uint_>(programs.size()), + reinterpret_cast<const cl_program*>(&programs[0]), + 0, + 0, + &ret + ); + + if(!program_){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return program(program_, false); + } + #endif // CL_VERSION_1_2 + + /// Returns the build log. + std::string build_log() const + { + return get_build_info<std::string>(CL_PROGRAM_BUILD_LOG, get_devices().front()); + } + + /// Creates and returns a new kernel object for \p name. + /// + /// For example, to create the \c "foo" kernel (after the program has been + /// created and built): + /// \code + /// boost::compute::kernel foo_kernel = foo_program.create_kernel("foo"); + /// \endcode + kernel create_kernel(const std::string &name) const; + + /// Returns \c true if the program is the same at \p other. + bool operator==(const program &other) const + { + return m_program == other.m_program; + } + + /// Returns \c true if the program is different from \p other. + bool operator!=(const program &other) const + { + return m_program != other.m_program; + } + + /// \internal_ + operator cl_program() const + { + return m_program; + } + + /// Creates a new program with \p source in \p context. + /// + /// \see_opencl_ref{clCreateProgramWithSource} + static program create_with_source(const std::string &source, + const context &context) + { + const char *source_string = source.c_str(); + + cl_int error = 0; + cl_program program_ = clCreateProgramWithSource(context, + uint_(1), + &source_string, + 0, + &error); + if(!program_){ + BOOST_THROW_EXCEPTION(opencl_error(error)); + } + + return program(program_, false); + } + + /// Creates a new program with \p sources in \p context. + /// + /// \see_opencl_ref{clCreateProgramWithSource} + static program create_with_source(const std::vector<std::string> &sources, + const context &context) + { + std::vector<const char*> source_strings(sources.size()); + for(size_t i = 0; i < sources.size(); i++){ + source_strings[i] = sources[i].c_str(); + } + + cl_int error = 0; + cl_program program_ = clCreateProgramWithSource(context, + uint_(sources.size()), + &source_strings[0], + 0, + &error); + if(!program_){ + BOOST_THROW_EXCEPTION(opencl_error(error)); + } + + return program(program_, false); + } + + /// Creates a new program with \p file in \p context. + /// + /// \see_opencl_ref{clCreateProgramWithSource} + static program create_with_source_file(const std::string &file, + const context &context) + { + // open file stream + std::ifstream stream(file.c_str()); + + if(stream.fail()){ + BOOST_THROW_EXCEPTION(std::ios_base::failure("failed to create stream.")); + } + + // read source + std::string source( + (std::istreambuf_iterator<char>(stream)), + std::istreambuf_iterator<char>() + ); + + // create program + return create_with_source(source, context); + } + + /// Creates a new program with \p binary of \p binary_size in + /// \p context. + /// + /// \see_opencl_ref{clCreateProgramWithBinary} + static program create_with_binary(const unsigned char *binary, + size_t binary_size, + const context &context) + { + const cl_device_id device = context.get_device().id(); + + cl_int error = 0; + cl_int binary_status = 0; + cl_program program_ = clCreateProgramWithBinary(context, + uint_(1), + &device, + &binary_size, + &binary, + &binary_status, + &error); + if(!program_){ + BOOST_THROW_EXCEPTION(opencl_error(error)); + } + if(binary_status != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(binary_status)); + } + + return program(program_, false); + } + + /// Creates a new program with \p binary in \p context. + /// + /// \see_opencl_ref{clCreateProgramWithBinary} + static program create_with_binary(const std::vector<unsigned char> &binary, + const context &context) + { + return create_with_binary(&binary[0], binary.size(), context); + } + + /// Creates a new program with \p file in \p context. + /// + /// \see_opencl_ref{clCreateProgramWithBinary} + static program create_with_binary_file(const std::string &file, + const context &context) + { + // open file stream + std::ifstream stream(file.c_str(), std::ios::in | std::ios::binary); + + // read binary + std::vector<unsigned char> binary( + (std::istreambuf_iterator<char>(stream)), + std::istreambuf_iterator<char>() + ); + + // create program + return create_with_binary(&binary[0], binary.size(), context); + } + + #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + /// Creates a new program with the built-in kernels listed in + /// \p kernel_names for \p devices in \p context. + /// + /// \opencl_version_warning{1,2} + /// + /// \see_opencl_ref{clCreateProgramWithBuiltInKernels} + static program create_with_builtin_kernels(const context &context, + const std::vector<device> &devices, + const std::string &kernel_names) + { + cl_int error = 0; + + cl_program program_ = clCreateProgramWithBuiltInKernels( + context.get(), + static_cast<uint_>(devices.size()), + reinterpret_cast<const cl_device_id *>(&devices[0]), + kernel_names.c_str(), + &error + ); + + if(!program_){ + BOOST_THROW_EXCEPTION(opencl_error(error)); + } + + return program(program_, false); + } + #endif // CL_VERSION_1_2 + + /// Create a new program with \p source in \p context and builds it with \p options. + /** + * In case BOOST_COMPUTE_USE_OFFLINE_CACHE macro is defined, + * the compiled binary is stored for reuse in the offline cache located in + * $HOME/.boost_compute on UNIX-like systems and in %APPDATA%/boost_compute + * on Windows. + */ + static program build_with_source( + const std::string &source, + const context &context, + const std::string &options = std::string() + ) + { +#ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE + // Get hash string for the kernel. + device d = context.get_device(); + platform p = d.platform(); + + detail::sha1 hash; + hash.process( p.name() ) + .process( p.version() ) + .process( d.name() ) + .process( options ) + .process( source ) + ; + + // Try to get cached program binaries: + try { + boost::optional<program> prog = load_program_binary(hash, context); + + if (prog) { + prog->build(options); + return *prog; + } + } catch (...) { + // Something bad happened. Fallback to normal compilation. + } + + // Cache is apparently not available. Just compile the sources. +#endif + const char *source_string = source.c_str(); + + cl_int error = 0; + cl_program program_ = clCreateProgramWithSource(context, + uint_(1), + &source_string, + 0, + &error); + if(!program_){ + BOOST_THROW_EXCEPTION(opencl_error(error)); + } + + program prog(program_, false); + prog.build(options); + +#ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE + // Save program binaries for future reuse. + save_program_binary(hash, prog); +#endif + + return prog; + } + +private: +#ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE + // Saves program binaries for future reuse. + static void save_program_binary(const std::string &hash, const program &prog) + { + std::string fname = detail::program_binary_path(hash, true) + "kernel"; + std::ofstream bfile(fname.c_str(), std::ios::binary); + if (!bfile) return; + + std::vector<unsigned char> binary = prog.binary(); + + size_t binary_size = binary.size(); + bfile.write((char*)&binary_size, sizeof(size_t)); + bfile.write((char*)binary.data(), binary_size); + } + + // Tries to read program binaries from file cache. + static boost::optional<program> load_program_binary( + const std::string &hash, const context &ctx + ) + { + std::string fname = detail::program_binary_path(hash) + "kernel"; + std::ifstream bfile(fname.c_str(), std::ios::binary); + if (!bfile) return boost::optional<program>(); + + size_t binary_size; + std::vector<unsigned char> binary; + + bfile.read((char*)&binary_size, sizeof(size_t)); + + binary.resize(binary_size); + bfile.read((char*)binary.data(), binary_size); + + return boost::optional<program>( + program::create_with_binary( + binary.data(), binary_size, ctx + ) + ); + } +#endif // BOOST_COMPUTE_USE_OFFLINE_CACHE + +private: + cl_program m_program; +}; + +/// \internal_ define get_info() specializations for program +BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(program, + ((cl_uint, CL_PROGRAM_REFERENCE_COUNT)) + ((cl_context, CL_PROGRAM_CONTEXT)) + ((cl_uint, CL_PROGRAM_NUM_DEVICES)) + ((std::vector<cl_device_id>, CL_PROGRAM_DEVICES)) + ((std::string, CL_PROGRAM_SOURCE)) + ((std::vector<size_t>, CL_PROGRAM_BINARY_SIZES)) + ((std::vector<unsigned char *>, CL_PROGRAM_BINARIES)) +) + +#ifdef CL_VERSION_1_2 +BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(program, + ((size_t, CL_PROGRAM_NUM_KERNELS)) + ((std::string, CL_PROGRAM_KERNEL_NAMES)) +) +#endif // CL_VERSION_1_2 + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_PROGRAM_HPP diff --git a/boost/compute/random.hpp b/boost/compute/random.hpp new file mode 100644 index 0000000000..1a361fe48b --- /dev/null +++ b/boost/compute/random.hpp @@ -0,0 +1,28 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_RANDOM_HPP +#define BOOST_COMPUTE_RANDOM_HPP + +/// \file +/// +/// Meta-header to include all Boost.Compute random headers. + +#include <boost/compute/random/bernoulli_distribution.hpp> +#include <boost/compute/random/default_random_engine.hpp> +#include <boost/compute/random/discrete_distribution.hpp> +#include <boost/compute/random/linear_congruential_engine.hpp> +#include <boost/compute/random/mersenne_twister_engine.hpp> +#include <boost/compute/random/threefry_engine.hpp> +#include <boost/compute/random/normal_distribution.hpp> +#include <boost/compute/random/uniform_int_distribution.hpp> +#include <boost/compute/random/uniform_real_distribution.hpp> + +#endif // BOOST_COMPUTE_RANDOM_HPP diff --git a/boost/compute/random/bernoulli_distribution.hpp b/boost/compute/random/bernoulli_distribution.hpp new file mode 100644 index 0000000000..edd1125090 --- /dev/null +++ b/boost/compute/random/bernoulli_distribution.hpp @@ -0,0 +1,92 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_RANDOM_BERNOULLI_DISTRIBUTION_HPP +#define BOOST_COMPUTE_RANDOM_BERNOULLI_DISTRIBUTION_HPP + +#include <boost/compute/command_queue.hpp> +#include <boost/compute/function.hpp> +#include <boost/compute/types/fundamental.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/detail/literal.hpp> + +namespace boost { +namespace compute { + +/// +/// \class bernoulli_distribution +/// \brief Produces random boolean values according to the following +/// discrete probability function with parameter p : +/// P(true/p) = p and P(false/p) = (1 - p) +/// +/// The following example shows how to setup a bernoulli distribution to +/// produce random boolean values with parameter p = 0.25 +/// +/// \snippet test/test_bernoulli_distribution.cpp generate +/// +template<class RealType = float> +class bernoulli_distribution +{ +public: + + /// Creates a new bernoulli distribution + bernoulli_distribution(RealType p = 0.5f) + : m_p(p) + { + } + + /// Destroys the bernoulli_distribution object + ~bernoulli_distribution() + { + } + + /// Returns the value of the parameter p + RealType p() const + { + return m_p; + } + + /// Generates bernoulli distributed booleans and stores + /// them in the range [\p first, \p last). + template<class OutputIterator, class Generator> + void generate(OutputIterator first, + OutputIterator last, + Generator &generator, + command_queue &queue) + { + size_t count = detail::iterator_range_size(first, last); + + vector<uint_> tmp(count, queue.get_context()); + generator.generate(tmp.begin(), tmp.end(), queue); + + BOOST_COMPUTE_FUNCTION(bool, scale_random, (const uint_ x), + { + return (convert_RealType(x) / MAX_RANDOM) < PARAM; + }); + + scale_random.define("PARAM", detail::make_literal(m_p)); + scale_random.define("MAX_RANDOM", "UINT_MAX"); + scale_random.define( + "convert_RealType", std::string("convert_") + type_name<RealType>() + ); + + transform( + tmp.begin(), tmp.end(), first, scale_random, queue + ); + } + +private: + RealType m_p; +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_RANDOM_BERNOULLI_DISTRIBUTION_HPP diff --git a/boost/compute/random/default_random_engine.hpp b/boost/compute/random/default_random_engine.hpp new file mode 100644 index 0000000000..b34dbd01fa --- /dev/null +++ b/boost/compute/random/default_random_engine.hpp @@ -0,0 +1,24 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_RANDOM_DEFAULT_RANDOM_ENGINE_HPP +#define BOOST_COMPUTE_RANDOM_DEFAULT_RANDOM_ENGINE_HPP + +#include <boost/compute/random/mersenne_twister_engine.hpp> + +namespace boost { +namespace compute { + +typedef mt19937 default_random_engine; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_RANDOM_DEFAULT_RANDOM_ENGINE_HPP diff --git a/boost/compute/random/discrete_distribution.hpp b/boost/compute/random/discrete_distribution.hpp new file mode 100644 index 0000000000..3707928f98 --- /dev/null +++ b/boost/compute/random/discrete_distribution.hpp @@ -0,0 +1,117 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_RANDOM_DISCRETE_DISTRIBUTION_HPP +#define BOOST_COMPUTE_RANDOM_DISCRETE_DISTRIBUTION_HPP + +#include <boost/compute/command_queue.hpp> +#include <boost/compute/function.hpp> +#include <boost/compute/algorithm/accumulate.hpp> +#include <boost/compute/algorithm/copy.hpp> +#include <boost/compute/algorithm/transform.hpp> +#include <boost/compute/detail/literal.hpp> +#include <boost/compute/types/fundamental.hpp> + +namespace boost { +namespace compute { + +/// \class discrete_distribution +/// \brief Produces random integers on the interval [0, n), where +/// probability of each integer is given by the weight of the ith +/// integer divided by the sum of all weights. +/// +/// The following example shows how to setup a discrete distribution to +/// produce 0 and 1 with equal probability +/// +/// \snippet test/test_discrete_distribution.cpp generate +/// +template<class IntType = uint_> +class discrete_distribution +{ +public: + typedef IntType result_type; + + /// Creates a new discrete distribution with weights given by + /// the range [\p first, \p last) + template<class InputIterator> + discrete_distribution(InputIterator first, InputIterator last) + : m_n(std::distance(first, last)), + m_probabilities(std::distance(first, last)) + { + double sum = 0; + + for(InputIterator iter = first; iter!=last; iter++) + { + sum += *iter; + } + + for(size_t i=0; i<m_n; i++) + { + m_probabilities[i] = m_probabilities[i-1] + first[i]/sum; + } + } + + /// Destroys the discrete_distribution object. + ~discrete_distribution() + { + } + + /// Returns the value of n + result_type n() const + { + return m_n; + } + + /// Returns the probabilities + ::std::vector<double> probabilities() const + { + return m_probabilities; + } + + /// Generates uniformily distributed integers and stores + /// them to the range [\p first, \p last). + template<class OutputIterator, class Generator> + void generate(OutputIterator first, + OutputIterator last, + Generator &generator, + command_queue &queue) + { + std::string source = "inline uint scale_random(uint x)\n"; + + source = source + + "{\n" + + "float rno = convert_float(x) / UINT_MAX;\n"; + for(size_t i=0; i<m_n; i++) + { + source = source + + "if(rno <= " + detail::make_literal<float>(m_probabilities[i]) + ")\n" + + " return " + detail::make_literal(i) + ";\n"; + } + + source = source + + "return " + detail::make_literal(m_n - 1) + ";\n" + + "}\n"; + + BOOST_COMPUTE_FUNCTION(IntType, scale_random, (const uint_ x), {}); + + scale_random.set_source(source); + + generator.generate(first, last, scale_random, queue); + } + +private: + size_t m_n; + ::std::vector<double> m_probabilities; +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_RANDOM_UNIFORM_INT_DISTRIBUTION_HPP diff --git a/boost/compute/random/linear_congruential_engine.hpp b/boost/compute/random/linear_congruential_engine.hpp new file mode 100644 index 0000000000..173a6c2881 --- /dev/null +++ b/boost/compute/random/linear_congruential_engine.hpp @@ -0,0 +1,238 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_RANDOM_LINEAR_CONGRUENTIAL_ENGINE_HPP +#define BOOST_COMPUTE_RANDOM_LINEAR_CONGRUENTIAL_ENGINE_HPP + +#include <algorithm> + +#include <boost/compute/types.hpp> +#include <boost/compute/buffer.hpp> +#include <boost/compute/kernel.hpp> +#include <boost/compute/context.hpp> +#include <boost/compute/program.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/transform.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/iterator/discard_iterator.hpp> +#include <boost/compute/utility/program_cache.hpp> + +namespace boost { +namespace compute { + +/// +/// \class linear_congruential_engine +/// \brief 'Quick and Dirty' linear congruential engine +/// +/// Quick and dirty linear congruential engine to generate low quality +/// random numbers very quickly. For uses in which good quality of random +/// numbers is required(Monte-Carlo Simulations), use other engines like +/// Mersenne Twister instead. +/// +template<class T = uint_> +class linear_congruential_engine +{ +public: + typedef T result_type; + static const T default_seed = 1; + static const T a = 1099087573; + static const size_t threads = 1024; + + /// Creates a new linear_congruential_engine and seeds it with \p value. + explicit linear_congruential_engine(command_queue &queue, + result_type value = default_seed) + : m_context(queue.get_context()), + m_multiplicands(m_context, threads * sizeof(result_type)) + { + // setup program + load_program(); + + // seed state + seed(value, queue); + + // generate multiplicands + generate_multiplicands(queue); + } + + /// Creates a new linear_congruential_engine object as a copy of \p other. + linear_congruential_engine(const linear_congruential_engine<T> &other) + : m_context(other.m_context), + m_program(other.m_program), + m_seed(other.m_seed), + m_multiplicands(other.m_multiplicands) + { + } + + /// Copies \p other to \c *this. + linear_congruential_engine<T>& + operator=(const linear_congruential_engine<T> &other) + { + if(this != &other){ + m_context = other.m_context; + m_program = other.m_program; + m_seed = other.m_seed; + m_multiplicands = other.m_multiplicands; + } + + return *this; + } + + /// Destroys the linear_congruential_engine object. + ~linear_congruential_engine() + { + } + + /// Seeds the random number generator with \p value. + /// + /// \param value seed value for the random-number generator + /// \param queue command queue to perform the operation + /// + /// If no seed value is provided, \c default_seed is used. + void seed(result_type value, command_queue &queue) + { + (void) queue; + + m_seed = value; + } + + /// \overload + void seed(command_queue &queue) + { + seed(default_seed, queue); + } + + /// Generates random numbers and stores them to the range [\p first, \p last). + template<class OutputIterator> + void generate(OutputIterator first, OutputIterator last, command_queue &queue) + { + size_t size = detail::iterator_range_size(first, last); + + kernel fill_kernel(m_program, "fill"); + fill_kernel.set_arg(1, m_multiplicands); + fill_kernel.set_arg(2, first.get_buffer()); + + size_t offset = 0; + + for(;;){ + size_t count = 0; + if(size > threads){ + count = (std::min)(static_cast<size_t>(threads), size - offset); + } + else { + count = size; + } + fill_kernel.set_arg(0, static_cast<const uint_>(m_seed)); + fill_kernel.set_arg(3, static_cast<const uint_>(offset)); + queue.enqueue_1d_range_kernel(fill_kernel, 0, count, 0); + + offset += count; + + if(offset >= size){ + break; + } + + update_seed(queue); + } + } + + /// \internal_ + void generate(discard_iterator first, discard_iterator last, command_queue &queue) + { + (void) queue; + + size_t size = detail::iterator_range_size(first, last); + uint_ max_mult = + detail::read_single_value<T>(m_multiplicands, threads-1, queue); + while(size >= threads) { + m_seed *= max_mult; + size -= threads; + } + m_seed *= + detail::read_single_value<T>(m_multiplicands, size-1, queue); + } + + /// Generates random numbers, transforms them with \p op, and then stores + /// them to the range [\p first, \p last). + template<class OutputIterator, class Function> + void generate(OutputIterator first, OutputIterator last, Function op, command_queue &queue) + { + vector<T> tmp(std::distance(first, last), queue.get_context()); + generate(tmp.begin(), tmp.end(), queue); + transform(tmp.begin(), tmp.end(), first, op, queue); + } + + /// Generates \p z random numbers and discards them. + void discard(size_t z, command_queue &queue) + { + generate(discard_iterator(0), discard_iterator(z), queue); + } + +private: + /// \internal_ + /// Generates the multiplicands for each thread + void generate_multiplicands(command_queue &queue) + { + kernel multiplicand_kernel = + m_program.create_kernel("multiplicand"); + multiplicand_kernel.set_arg(0, m_multiplicands); + + queue.enqueue_task(multiplicand_kernel); + } + + /// \internal_ + void update_seed(command_queue &queue) + { + m_seed *= + detail::read_single_value<T>(m_multiplicands, threads-1, queue); + } + + /// \internal_ + void load_program() + { + boost::shared_ptr<program_cache> cache = + program_cache::get_global_cache(m_context); + + std::string cache_key = + std::string("__boost_linear_congruential_engine_") + type_name<T>(); + + const char source[] = + "__kernel void multiplicand(__global uint *multiplicands)\n" + "{\n" + " uint a = 1099087573;\n" + " multiplicands[0] = a;\n" + " for(uint i = 1; i < 1024; i++){\n" + " multiplicands[i] = a * multiplicands[i-1];\n" + " }\n" + "}\n" + + "__kernel void fill(const uint seed,\n" + " __global uint *multiplicands,\n" + " __global uint *result," + " const uint offset)\n" + "{\n" + " const uint i = get_global_id(0);\n" + " result[offset+i] = seed * multiplicands[i];\n" + "}\n"; + + m_program = cache->get_or_build(cache_key, std::string(), source, m_context); + } + +private: + context m_context; + program m_program; + T m_seed; + buffer m_multiplicands; +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_RANDOM_LINEAR_CONGRUENTIAL_ENGINE_HPP diff --git a/boost/compute/random/mersenne_twister_engine.hpp b/boost/compute/random/mersenne_twister_engine.hpp new file mode 100644 index 0000000000..db8560e53d --- /dev/null +++ b/boost/compute/random/mersenne_twister_engine.hpp @@ -0,0 +1,254 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_RANDOM_MERSENNE_TWISTER_ENGINE_HPP +#define BOOST_COMPUTE_RANDOM_MERSENNE_TWISTER_ENGINE_HPP + +#include <algorithm> + +#include <boost/compute/types.hpp> +#include <boost/compute/buffer.hpp> +#include <boost/compute/kernel.hpp> +#include <boost/compute/context.hpp> +#include <boost/compute/program.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/transform.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/iterator/discard_iterator.hpp> +#include <boost/compute/utility/program_cache.hpp> + +namespace boost { +namespace compute { + +/// \class mersenne_twister_engine +/// \brief Mersenne twister pseudorandom number generator. +template<class T> +class mersenne_twister_engine +{ +public: + typedef T result_type; + static const T default_seed = 5489U; + static const T n = 624; + static const T m = 397; + + /// Creates a new mersenne_twister_engine and seeds it with \p value. + explicit mersenne_twister_engine(command_queue &queue, + result_type value = default_seed) + : m_context(queue.get_context()), + m_state_buffer(m_context, n * sizeof(result_type)) + { + // setup program + load_program(); + + // seed state + seed(value, queue); + } + + /// Creates a new mersenne_twister_engine object as a copy of \p other. + mersenne_twister_engine(const mersenne_twister_engine<T> &other) + : m_context(other.m_context), + m_state_index(other.m_state_index), + m_program(other.m_program), + m_state_buffer(other.m_state_buffer) + { + } + + /// Copies \p other to \c *this. + mersenne_twister_engine<T>& operator=(const mersenne_twister_engine<T> &other) + { + if(this != &other){ + m_context = other.m_context; + m_state_index = other.m_state_index; + m_program = other.m_program; + m_state_buffer = other.m_state_buffer; + } + + return *this; + } + + /// Destroys the mersenne_twister_engine object. + ~mersenne_twister_engine() + { + } + + /// Seeds the random number generator with \p value. + /// + /// \param value seed value for the random-number generator + /// \param queue command queue to perform the operation + /// + /// If no seed value is provided, \c default_seed is used. + void seed(result_type value, command_queue &queue) + { + kernel seed_kernel = m_program.create_kernel("seed"); + seed_kernel.set_arg(0, value); + seed_kernel.set_arg(1, m_state_buffer); + + queue.enqueue_task(seed_kernel); + + m_state_index = 0; + } + + /// \overload + void seed(command_queue &queue) + { + seed(default_seed, queue); + } + + /// Generates random numbers and stores them to the range [\p first, \p last). + template<class OutputIterator> + void generate(OutputIterator first, OutputIterator last, command_queue &queue) + { + const size_t size = detail::iterator_range_size(first, last); + + kernel fill_kernel(m_program, "fill"); + fill_kernel.set_arg(0, m_state_buffer); + fill_kernel.set_arg(2, first.get_buffer()); + + size_t offset = 0; + size_t &p = m_state_index; + + for(;;){ + size_t count = 0; + if(size > n){ + count = (std::min)(static_cast<size_t>(n), size - offset); + } + else { + count = size; + } + fill_kernel.set_arg(1, static_cast<const uint_>(p)); + fill_kernel.set_arg(3, static_cast<const uint_>(offset)); + queue.enqueue_1d_range_kernel(fill_kernel, 0, count, 0); + + p += count; + offset += count; + + if(offset >= size){ + break; + } + + generate_state(queue); + p = 0; + } + } + + /// \internal_ + void generate(discard_iterator first, discard_iterator last, command_queue &queue) + { + (void) queue; + + m_state_index += std::distance(first, last); + } + + /// Generates random numbers, transforms them with \p op, and then stores + /// them to the range [\p first, \p last). + template<class OutputIterator, class Function> + void generate(OutputIterator first, OutputIterator last, Function op, command_queue &queue) + { + vector<T> tmp(std::distance(first, last), queue.get_context()); + generate(tmp.begin(), tmp.end(), queue); + transform(tmp.begin(), tmp.end(), first, op, queue); + } + + /// Generates \p z random numbers and discards them. + void discard(size_t z, command_queue &queue) + { + generate(discard_iterator(0), discard_iterator(z), queue); + } + + /// \internal_ (deprecated) + template<class OutputIterator> + void fill(OutputIterator first, OutputIterator last, command_queue &queue) + { + generate(first, last, queue); + } + +private: + /// \internal_ + void generate_state(command_queue &queue) + { + kernel generate_state_kernel = + m_program.create_kernel("generate_state"); + generate_state_kernel.set_arg(0, m_state_buffer); + queue.enqueue_task(generate_state_kernel); + } + + /// \internal_ + void load_program() + { + boost::shared_ptr<program_cache> cache = + program_cache::get_global_cache(m_context); + + std::string cache_key = + std::string("__boost_mersenne_twister_engine_") + type_name<T>(); + + const char source[] = + "static uint twiddle(uint u, uint v)\n" + "{\n" + " return (((u & 0x80000000U) | (v & 0x7FFFFFFFU)) >> 1) ^\n" + " ((v & 1U) ? 0x9908B0DFU : 0x0U);\n" + "}\n" + + "__kernel void generate_state(__global uint *state)\n" + "{\n" + " const uint n = 624;\n" + " const uint m = 397;\n" + " for(uint i = 0; i < (n - m); i++)\n" + " state[i] = state[i+m] ^ twiddle(state[i], state[i+1]);\n" + " for(uint i = n - m; i < (n - 1); i++)\n" + " state[i] = state[i+m-n] ^ twiddle(state[i], state[i+1]);\n" + " state[n-1] = state[m-1] ^ twiddle(state[n-1], state[0]);\n" + "}\n" + + "__kernel void seed(const uint s, __global uint *state)\n" + "{\n" + " const uint n = 624;\n" + " state[0] = s & 0xFFFFFFFFU;\n" + " for(uint i = 1; i < n; i++){\n" + " state[i] = 1812433253U * (state[i-1] ^ (state[i-1] >> 30)) + i;\n" + " state[i] &= 0xFFFFFFFFU;\n" + " }\n" + " generate_state(state);\n" + "}\n" + + "static uint random_number(__global uint *state, const uint p)\n" + "{\n" + " uint x = state[p];\n" + " x ^= (x >> 11);\n" + " x ^= (x << 7) & 0x9D2C5680U;\n" + " x ^= (x << 15) & 0xEFC60000U;\n" + " return x ^ (x >> 18);\n" + "}\n" + + "__kernel void fill(__global uint *state,\n" + " const uint state_index,\n" + " __global uint *vector,\n" + " const uint offset)\n" + "{\n" + " const uint i = get_global_id(0);\n" + " vector[offset+i] = random_number(state, state_index + i);\n" + "}\n"; + + m_program = cache->get_or_build(cache_key, std::string(), source, m_context); + } + +private: + context m_context; + size_t m_state_index; + program m_program; + buffer m_state_buffer; +}; + +typedef mersenne_twister_engine<uint_> mt19937; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_RANDOM_MERSENNE_TWISTER_ENGINE_HPP diff --git a/boost/compute/random/normal_distribution.hpp b/boost/compute/random/normal_distribution.hpp new file mode 100644 index 0000000000..d025faeb2e --- /dev/null +++ b/boost/compute/random/normal_distribution.hpp @@ -0,0 +1,124 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_RANDOM_NORMAL_DISTRIBUTION_HPP +#define BOOST_COMPUTE_RANDOM_NORMAL_DISTRIBUTION_HPP + +#include <limits> + +#include <boost/compute/command_queue.hpp> +#include <boost/compute/function.hpp> +#include <boost/compute/types/fundamental.hpp> +#include <boost/compute/type_traits/make_vector_type.hpp> + +namespace boost { +namespace compute { + +/// \class normal_distribution +/// \brief Produces random, normally-distributed floating-point numbers. +/// +/// The following example shows how to setup a normal distribution to +/// produce random \c float values centered at \c 5: +/// +/// \snippet test/test_normal_distribution.cpp generate +/// +/// \see default_random_engine, uniform_real_distribution +template<class RealType = float> +class normal_distribution +{ +public: + typedef RealType result_type; + + /// Creates a new normal distribution producing numbers with the given + /// \p mean and \p stddev. + normal_distribution(RealType mean = 0.f, RealType stddev = 1.f) + : m_mean(mean), + m_stddev(stddev) + { + } + + /// Destroys the normal distribution object. + ~normal_distribution() + { + } + + /// Returns the mean value of the distribution. + result_type mean() const + { + return m_mean; + } + + /// Returns the standard-deviation of the distribution. + result_type stddev() const + { + return m_stddev; + } + + /// Returns the minimum value of the distribution. + result_type min BOOST_PREVENT_MACRO_SUBSTITUTION () const + { + return -std::numeric_limits<RealType>::infinity(); + } + + /// Returns the maximum value of the distribution. + result_type max BOOST_PREVENT_MACRO_SUBSTITUTION () const + { + return std::numeric_limits<RealType>::infinity(); + } + + /// Generates normally-distributed floating-point numbers and stores + /// them to the range [\p first, \p last). + template<class OutputIterator, class Generator> + void generate(OutputIterator first, + OutputIterator last, + Generator &generator, + command_queue &queue) + { + typedef typename make_vector_type<RealType, 2>::type RealType2; + + size_t count = detail::iterator_range_size(first, last); + + vector<uint_> tmp(count, queue.get_context()); + generator.generate(tmp.begin(), tmp.end(), queue); + + BOOST_COMPUTE_FUNCTION(RealType2, box_muller, (const uint2_ x), + { + const RealType x1 = x.x / (RealType) (UINT_MAX - 1); + const RealType x2 = x.y / (RealType) (UINT_MAX - 1); + + const RealType z1 = sqrt(-2.f * log2(x1)) * cos(2.f * M_PI_F * x2); + const RealType z2 = sqrt(-2.f * log2(x1)) * sin(2.f * M_PI_F * x2); + + return (RealType2)(MEAN, MEAN) + (RealType2)(z1, z2) * (RealType2)(STDDEV, STDDEV); + }); + + box_muller.define("MEAN", boost::lexical_cast<std::string>(m_mean)); + box_muller.define("STDDEV", boost::lexical_cast<std::string>(m_stddev)); + box_muller.define("RealType", type_name<RealType>()); + box_muller.define("RealType2", type_name<RealType2>()); + + transform( + make_buffer_iterator<uint2_>(tmp.get_buffer(), 0), + make_buffer_iterator<uint2_>(tmp.get_buffer(), count / 2), + make_buffer_iterator<RealType2>(first.get_buffer(), 0), + box_muller, + queue + ); + } + +private: + RealType m_mean; + RealType m_stddev; +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_RANDOM_NORMAL_DISTRIBUTION_HPP diff --git a/boost/compute/random/threefry_engine.hpp b/boost/compute/random/threefry_engine.hpp new file mode 100644 index 0000000000..917bb72c06 --- /dev/null +++ b/boost/compute/random/threefry_engine.hpp @@ -0,0 +1,311 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2015 Muhammad Junaid Muzammil <mjunaidmuzammil@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_RANDOM_THREEFRY_HPP +#define BOOST_COMPUTE_RANDOM_THREEFRY_HPP + +#include <algorithm> + +#include <boost/compute/types.hpp> +#include <boost/compute/buffer.hpp> +#include <boost/compute/kernel.hpp> +#include <boost/compute/context.hpp> +#include <boost/compute/program.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/algorithm/transform.hpp> +#include <boost/compute/detail/iterator_range_size.hpp> +#include <boost/compute/utility/program_cache.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/iterator/discard_iterator.hpp> + +namespace boost { +namespace compute { + +/// \class threefry_engine +/// \brief Threefry pseudorandom number generator. +template<class T = uint_> +class threefry_engine +{ +public: + static const size_t threads = 1024; + typedef T result_type; + + /// Creates a new threefry_engine and seeds it with \p value. + explicit threefry_engine(command_queue &queue) + : m_context(queue.get_context()) + { + // setup program + load_program(); + } + + /// Creates a new threefry_engine object as a copy of \p other. + threefry_engine(const threefry_engine<T> &other) + : m_context(other.m_context), + m_program(other.m_program) + { + } + + /// Copies \p other to \c *this. + threefry_engine<T>& operator=(const threefry_engine<T> &other) + { + if(this != &other){ + m_context = other.m_context; + m_program = other.m_program; + } + + return *this; + } + + /// Destroys the threefry_engine object. + ~threefry_engine() + { + } + +private: + /// \internal_ + void load_program() + { + boost::shared_ptr<program_cache> cache = + program_cache::get_global_cache(m_context); + std::string cache_key = + std::string("threefry_engine_32x2"); + + // Copyright 2010-2012, D. E. Shaw Research. + // All rights reserved. + + // Redistribution and use in source and binary forms, with or without + // modification, are permitted provided that the following conditions are + // met: + + // * Redistributions of source code must retain the above copyright + // notice, this list of conditions, and the following disclaimer. + + // * Redistributions in binary form must reproduce the above copyright + // notice, this list of conditions, and the following disclaimer in the + // documentation and/or other materials provided with the distribution. + + // * Neither the name of D. E. Shaw Research nor the names of its + // contributors may be used to endorse or promote products derived from + // this software without specific prior written permission. + + // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + const char source[] = + "#define THREEFRY2x32_DEFAULT_ROUNDS 20\n" + "#define SKEIN_KS_PARITY_32 0x1BD11BDA\n" + + "enum r123_enum_threefry32x2 {\n" + " R_32x2_0_0=13,\n" + " R_32x2_1_0=15,\n" + " R_32x2_2_0=26,\n" + " R_32x2_3_0= 6,\n" + " R_32x2_4_0=17,\n" + " R_32x2_5_0=29,\n" + " R_32x2_6_0=16,\n" + " R_32x2_7_0=24\n" + "};\n" + + "static uint RotL_32(uint x, uint N)\n" + "{\n" + " return (x << (N & 31)) | (x >> ((32-N) & 31));\n" + "}\n" + + "struct r123array2x32 {\n" + " uint v[2];\n" + "};\n" + "typedef struct r123array2x32 threefry2x32_ctr_t;\n" + "typedef struct r123array2x32 threefry2x32_key_t;\n" + + "threefry2x32_ctr_t threefry2x32_R(unsigned int Nrounds, threefry2x32_ctr_t in, threefry2x32_key_t k)\n" + "{\n" + " threefry2x32_ctr_t X;\n" + " uint ks[3];\n" + " uint i; \n" + " ks[2] = SKEIN_KS_PARITY_32;\n" + " for (i=0;i < 2; i++) {\n" + " ks[i] = k.v[i];\n" + " X.v[i] = in.v[i];\n" + " ks[2] ^= k.v[i];\n" + " }\n" + " X.v[0] += ks[0]; X.v[1] += ks[1];\n" + " if(Nrounds>0){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>1){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>2){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>3){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>3){\n" + " X.v[0] += ks[1]; X.v[1] += ks[2];\n" + " X.v[1] += 1;\n" + " }\n" + " if(Nrounds>4){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>5){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>6){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>7){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>7){\n" + " X.v[0] += ks[2]; X.v[1] += ks[0];\n" + " X.v[1] += 2;\n" + " }\n" + " if(Nrounds>8){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>9){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>10){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>11){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>11){\n" + " X.v[0] += ks[0]; X.v[1] += ks[1];\n" + " X.v[1] += 3;\n" + " }\n" + " if(Nrounds>12){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>13){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>14){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>15){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>15){\n" + " X.v[0] += ks[1]; X.v[1] += ks[2];\n" + " X.v[1] += 4;\n" + " }\n" + " if(Nrounds>16){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>17){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>18){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>19){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>19){\n" + " X.v[0] += ks[2]; X.v[1] += ks[0];\n" + " X.v[1] += 5;\n" + " }\n" + " if(Nrounds>20){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>21){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>22){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>23){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>23){\n" + " X.v[0] += ks[0]; X.v[1] += ks[1];\n" + " X.v[1] += 6;\n" + " }\n" + " if(Nrounds>24){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_0_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>25){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_1_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>26){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_2_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>27){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_3_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>27){\n" + " X.v[0] += ks[1]; X.v[1] += ks[2];\n" + " X.v[1] += 7;\n" + " }\n" + " if(Nrounds>28){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_4_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>29){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_5_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>30){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_6_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>31){ X.v[0] += X.v[1]; X.v[1] = RotL_32(X.v[1],R_32x2_7_0); X.v[1] ^= X.v[0]; }\n" + " if(Nrounds>31){\n" + " X.v[0] += ks[2]; X.v[1] += ks[0];\n" + " X.v[1] += 8;\n" + " }\n" + " return X;\n" + "}\n" + + "__kernel void generate_rng(__global uint *ctr, __global uint *key, const uint offset) {\n" + " threefry2x32_ctr_t in;\n" + " threefry2x32_key_t k;\n" + " const uint i = get_global_id(0);\n" + " in.v[0] = ctr[2 * (offset + i)];\n" + " in.v[1] = ctr[2 * (offset + i) + 1];\n" + " k.v[0] = key[2 * (offset + i)];\n" + " k.v[1] = key[2 * (offset + i) + 1];\n" + " in = threefry2x32_R(20, in, k);\n" + " ctr[2 * (offset + i)] = in.v[0];\n" + " ctr[2 * (offset + i) + 1] = in.v[1];\n" + "}\n"; + + m_program = cache->get_or_build(cache_key, std::string(), source, m_context); + } + +public: + + + /// Generates Threefry random numbers using both the counter and key values, and then stores + /// them to the range [\p first_ctr, \p last_ctr). + template<class OutputIterator> + void generate(OutputIterator first_ctr, OutputIterator last_ctr, OutputIterator first_key, OutputIterator last_key, command_queue &queue) { + const size_t size_ctr = detail::iterator_range_size(first_ctr, last_ctr); + const size_t size_key = detail::iterator_range_size(first_key, last_key); + if(!size_ctr || !size_key || (size_ctr != size_key)) { + return; + } + kernel rng_kernel = m_program.create_kernel("generate_rng"); + + rng_kernel.set_arg(0, first_ctr.get_buffer()); + rng_kernel.set_arg(1, first_key.get_buffer()); + size_t offset = 0; + + for(;;){ + size_t count = 0; + size_t size = size_ctr/2; + if(size > threads){ + count = (std::min)(static_cast<size_t>(threads), size - offset); + } + else { + count = size; + } + rng_kernel.set_arg(2, static_cast<const uint_>(offset)); + queue.enqueue_1d_range_kernel(rng_kernel, 0, count, 0); + + offset += count; + + if(offset >= size){ + break; + } + + } + } + + template<class OutputIterator> + void generate(OutputIterator first_ctr, OutputIterator last_ctr, command_queue &queue) { + const size_t size_ctr = detail::iterator_range_size(first_ctr, last_ctr); + if(!size_ctr) { + return; + } + boost::compute::vector<uint_> vector_key(size_ctr, m_context); + vector_key.assign(size_ctr, 0, queue); + kernel rng_kernel = m_program.create_kernel("generate_rng"); + + rng_kernel.set_arg(0, first_ctr.get_buffer()); + rng_kernel.set_arg(1, vector_key); + size_t offset = 0; + + for(;;){ + size_t count = 0; + size_t size = size_ctr/2; + if(size > threads){ + count = (std::min)(static_cast<size_t>(threads), size - offset); + } + else { + count = size; + } + rng_kernel.set_arg(2, static_cast<const uint_>(offset)); + queue.enqueue_1d_range_kernel(rng_kernel, 0, count, 0); + + offset += count; + + if(offset >= size){ + break; + } + + } + } +private: + context m_context; + program m_program; +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_RANDOM_THREEFRY_HPP diff --git a/boost/compute/random/uniform_int_distribution.hpp b/boost/compute/random/uniform_int_distribution.hpp new file mode 100644 index 0000000000..92e8b3305f --- /dev/null +++ b/boost/compute/random/uniform_int_distribution.hpp @@ -0,0 +1,111 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_RANDOM_UNIFORM_INT_DISTRIBUTION_HPP +#define BOOST_COMPUTE_RANDOM_UNIFORM_INT_DISTRIBUTION_HPP + +#include <limits> + +#include <boost/compute/command_queue.hpp> +#include <boost/compute/container/vector.hpp> +#include <boost/compute/function.hpp> +#include <boost/compute/types/fundamental.hpp> +#include <boost/compute/algorithm/copy_if.hpp> +#include <boost/compute/algorithm/transform.hpp> + +namespace boost { +namespace compute { + +/// \class uniform_int_distribution +/// \brief Produces uniformily distributed random integers +/// +/// The following example shows how to setup a uniform int distribution to +/// produce random integers 0 and 1. +/// +/// \snippet test/test_uniform_int_distribution.cpp generate +/// +template<class IntType = uint_> +class uniform_int_distribution +{ +public: + typedef IntType result_type; + + /// Creates a new uniform distribution producing numbers in the range + /// [\p a, \p b]. + explicit uniform_int_distribution(IntType a = 0, + IntType b = (std::numeric_limits<IntType>::max)()) + : m_a(a), + m_b(b) + { + } + + /// Destroys the uniform_int_distribution object. + ~uniform_int_distribution() + { + } + + /// Returns the minimum value of the distribution. + result_type a() const + { + return m_a; + } + + /// Returns the maximum value of the distribution. + result_type b() const + { + return m_b; + } + + /// Generates uniformily distributed integers and stores + /// them to the range [\p first, \p last). + template<class OutputIterator, class Generator> + void generate(OutputIterator first, + OutputIterator last, + Generator &generator, + command_queue &queue) + { + size_t size = std::distance(first, last); + typedef typename Generator::result_type g_result_type; + + vector<g_result_type> tmp(size, queue.get_context()); + vector<g_result_type> tmp2(size, queue.get_context()); + + uint_ bound = ((uint_(-1))/(m_b-m_a+1))*(m_b-m_a+1); + + buffer_iterator<g_result_type> tmp2_iter; + + while(size>0) + { + generator.generate(tmp.begin(), tmp.begin() + size, queue); + tmp2_iter = copy_if(tmp.begin(), tmp.begin() + size, tmp2.begin(), + _1 <= bound, queue); + size = std::distance(tmp2_iter, tmp2.end()); + } + + BOOST_COMPUTE_FUNCTION(IntType, scale_random, (const g_result_type x), + { + return LO + (x % (HI-LO+1)); + }); + + scale_random.define("LO", boost::lexical_cast<std::string>(m_a)); + scale_random.define("HI", boost::lexical_cast<std::string>(m_b)); + + transform(tmp2.begin(), tmp2.end(), first, scale_random, queue); + } + +private: + IntType m_a; + IntType m_b; +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_RANDOM_UNIFORM_INT_DISTRIBUTION_HPP diff --git a/boost/compute/random/uniform_real_distribution.hpp b/boost/compute/random/uniform_real_distribution.hpp new file mode 100644 index 0000000000..231b0dba01 --- /dev/null +++ b/boost/compute/random/uniform_real_distribution.hpp @@ -0,0 +1,105 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_RANDOM_UNIFORM_REAL_DISTRIBUTION_HPP +#define BOOST_COMPUTE_RANDOM_UNIFORM_REAL_DISTRIBUTION_HPP + +#include <boost/compute/command_queue.hpp> +#include <boost/compute/function.hpp> +#include <boost/compute/detail/literal.hpp> +#include <boost/compute/types/fundamental.hpp> + +namespace boost { +namespace compute { + +/// \class uniform_real_distribution +/// \brief Produces uniformily distributed random floating-point numbers. +/// +/// The following example shows how to setup a uniform real distribution to +/// produce random \c float values between \c 1 and \c 100. +/// +/// \snippet test/test_uniform_real_distribution.cpp generate +/// +/// \see default_random_engine, normal_distribution +template<class RealType = float> +class uniform_real_distribution +{ +public: + typedef RealType result_type; + + /// Creates a new uniform distribution producing numbers in the range + /// [\p a, \p b). + uniform_real_distribution(RealType a = 0.f, RealType b = 1.f) + : m_a(a), + m_b(b) + { + } + + /// Destroys the uniform_real_distribution object. + ~uniform_real_distribution() + { + } + + /// Returns the minimum value of the distribution. + result_type a() const + { + return m_a; + } + + /// Returns the maximum value of the distribution. + result_type b() const + { + return m_b; + } + + /// Generates uniformily distributed floating-point numbers and stores + /// them to the range [\p first, \p last). + template<class OutputIterator, class Generator> + void generate(OutputIterator first, + OutputIterator last, + Generator &generator, + command_queue &queue) + { + BOOST_COMPUTE_FUNCTION(RealType, scale_random, (const uint_ x), + { + return LO + (convert_RealType(x) / MAX_RANDOM) * (HI - LO); + }); + + scale_random.define("LO", detail::make_literal(m_a)); + scale_random.define("HI", detail::make_literal(m_b)); + scale_random.define("MAX_RANDOM", "UINT_MAX"); + scale_random.define( + "convert_RealType", std::string("convert_") + type_name<RealType>() + ); + + generator.generate( + first, last, scale_random, queue + ); + } + + /// \internal_ (deprecated) + template<class OutputIterator, class Generator> + void fill(OutputIterator first, + OutputIterator last, + Generator &g, + command_queue &queue) + { + generate(first, last, g, queue); + } + +private: + RealType m_a; + RealType m_b; +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_RANDOM_UNIFORM_REAL_DISTRIBUTION_HPP diff --git a/boost/compute/source.hpp b/boost/compute/source.hpp new file mode 100644 index 0000000000..844dfa49ea --- /dev/null +++ b/boost/compute/source.hpp @@ -0,0 +1,12 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +// deprecated, use <boost/compute/utility/source.hpp> instead +#include <boost/compute/utility/source.hpp> diff --git a/boost/compute/svm.hpp b/boost/compute/svm.hpp new file mode 100644 index 0000000000..d03c8d9079 --- /dev/null +++ b/boost/compute/svm.hpp @@ -0,0 +1,62 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_SVM_HPP +#define BOOST_COMPUTE_SVM_HPP + +#include <boost/compute/config.hpp> +#include <boost/compute/context.hpp> +#include <boost/compute/memory/svm_ptr.hpp> + +// svm functions require opencl 2.0 +#if defined(CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + +namespace boost { +namespace compute { + +/// Allocates a shared virtual memory (SVM) buffer. +// +/// \opencl_version_warning{2,0} +/// +/// \see_opencl2_ref{clSVMAlloc} +/// +/// \see svm_free() +template<class T> +inline svm_ptr<T> svm_alloc(const context &context, + size_t size, + cl_svm_mem_flags flags = CL_MEM_READ_WRITE, + unsigned int alignment = 0) +{ + svm_ptr<T> ptr(clSVMAlloc(context.get(), flags, size * sizeof(T), alignment)); + if(!ptr.get()){ + BOOST_THROW_EXCEPTION(opencl_error(CL_MEM_OBJECT_ALLOCATION_FAILURE)); + } + return ptr; +} + +/// Deallocates a shared virtual memory (SVM) buffer. +/// +/// \opencl_version_warning{2,0} +/// +/// \see_opencl2_ref{clSVMFree} +/// +/// \see svm_alloc(), command_queue::enqueue_svm_free() +template<class T> +inline void svm_free(const context &context, svm_ptr<T> ptr) +{ + clSVMFree(context.get(), ptr.get()); +} + +} // end compute namespace +} // end boost namespace + +#endif // CL_VERSION_2_0 + +#endif // BOOST_COMPUTE_PIPE_HPP diff --git a/boost/compute/system.hpp b/boost/compute/system.hpp new file mode 100644 index 0000000000..f205fece7d --- /dev/null +++ b/boost/compute/system.hpp @@ -0,0 +1,278 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_SYSTEM_HPP +#define BOOST_COMPUTE_SYSTEM_HPP + +#include <string> +#include <vector> +#include <cstdlib> + +#include <boost/throw_exception.hpp> + +#include <boost/compute/cl.hpp> +#include <boost/compute/device.hpp> +#include <boost/compute/context.hpp> +#include <boost/compute/platform.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/detail/getenv.hpp> +#include <boost/compute/exception/no_device_found.hpp> + +namespace boost { +namespace compute { + +/// \class system +/// \brief Provides access to platforms and devices on the system. +/// +/// The system class contains a set of static functions which provide access to +/// the OpenCL platforms and compute devices on the host system. +/// +/// The default_device() convenience method automatically selects and returns +/// the "best" compute device for the system following a set of heuristics and +/// environment variables. This simplifies setup of the OpenCL enviornment. +/// +/// \see platform, device, context +class system +{ +public: + /// Returns the default compute device for the system. + /// + /// The default device is selected based on a set of heuristics and can be + /// influenced using one of the following environment variables: + /// + /// \li \c BOOST_COMPUTE_DEFAULT_DEVICE - + /// name of the compute device (e.g. "GTX TITAN") + /// \li \c BOOST_COMPUTE_DEFAULT_DEVICE_TYPE + /// type of the compute device (e.g. "GPU" or "CPU") + /// \li \c BOOST_COMPUTE_DEFAULT_PLATFORM - + /// name of the platform (e.g. "NVIDIA CUDA") + /// \li \c BOOST_COMPUTE_DEFAULT_VENDOR - + /// name of the device vendor (e.g. "NVIDIA") + /// + /// The default device is determined once on the first time this function + /// is called. Calling this function multiple times will always result in + /// the same device being returned. + /// + /// If no OpenCL device is found on the system, a no_device_found exception + /// is thrown. + /// + /// For example, to print the name of the default compute device on the + /// system: + /// \code + /// // get the default compute device + /// boost::compute::device device = boost::compute::system::default_device(); + /// + /// // print the name of the device + /// std::cout << "default device: " << device.name() << std::endl; + /// \endcode + static device default_device() + { + static device default_device = find_default_device(); + + return default_device; + } + + /// Returns the device with \p name. + /// + /// \throws no_device_found if no device with \p name is found. + static device find_device(const std::string &name) + { + const std::vector<device> devices = system::devices(); + for(size_t i = 0; i < devices.size(); i++){ + const device& device = devices[i]; + + if(device.name() == name){ + return device; + } + } + + BOOST_THROW_EXCEPTION(no_device_found()); + } + + /// Returns a vector containing all of the compute devices on + /// the system. + /// + /// For example, to print out the name of each OpenCL-capable device + /// available on the system: + /// \code + /// for(const auto &device : boost::compute::system::devices()){ + /// std::cout << device.name() << std::endl; + /// } + /// \endcode + static std::vector<device> devices() + { + std::vector<device> devices; + + const std::vector<platform> platforms = system::platforms(); + for(size_t i = 0; i < platforms.size(); i++){ + const std::vector<device> platform_devices = platforms[i].devices(); + + devices.insert( + devices.end(), platform_devices.begin(), platform_devices.end() + ); + } + + return devices; + } + + /// Returns the number of compute devices on the system. + static size_t device_count() + { + size_t count = 0; + + const std::vector<platform> platforms = system::platforms(); + for(size_t i = 0; i < platforms.size(); i++){ + count += platforms[i].device_count(); + } + + return count; + } + + /// Returns the default context for the system. + /// + /// The default context is created for the default device on the system + /// (as returned by default_device()). + /// + /// The default context is created once on the first time this function is + /// called. Calling this function multiple times will always result in the + /// same context object being returned. + static context default_context() + { + static context default_context(default_device()); + + return default_context; + } + + /// Returns the default command queue for the system. + static command_queue& default_queue() + { + static command_queue queue(default_context(), default_device()); + + return queue; + } + + /// Blocks until all outstanding computations on the default + /// command queue are complete. + /// + /// This is equivalent to: + /// \code + /// system::default_queue().finish(); + /// \endcode + static void finish() + { + default_queue().finish(); + } + + /// Returns a vector containing each of the OpenCL platforms on the system. + /// + /// For example, to print out the name of each OpenCL platform present on + /// the system: + /// \code + /// for(const auto &platform : boost::compute::system::platforms()){ + /// std::cout << platform.name() << std::endl; + /// } + /// \endcode + static std::vector<platform> platforms() + { + cl_uint count = 0; + clGetPlatformIDs(0, 0, &count); + + std::vector<cl_platform_id> platform_ids(count); + clGetPlatformIDs(count, &platform_ids[0], 0); + + std::vector<platform> platforms; + for(size_t i = 0; i < platform_ids.size(); i++){ + platforms.push_back(platform(platform_ids[i])); + } + + return platforms; + } + + /// Returns the number of compute platforms on the system. + static size_t platform_count() + { + cl_uint count = 0; + clGetPlatformIDs(0, 0, &count); + return static_cast<size_t>(count); + } + +private: + /// \internal_ + static device find_default_device() + { + // get a list of all devices on the system + const std::vector<device> devices_ = devices(); + if(devices_.empty()){ + BOOST_THROW_EXCEPTION(no_device_found()); + } + + // check for device from environment variable + const char *name = detail::getenv("BOOST_COMPUTE_DEFAULT_DEVICE"); + const char *type = detail::getenv("BOOST_COMPUTE_DEFAULT_DEVICE_TYPE"); + const char *platform = detail::getenv("BOOST_COMPUTE_DEFAULT_PLATFORM"); + const char *vendor = detail::getenv("BOOST_COMPUTE_DEFAULT_VENDOR"); + + if(name || type || platform || vendor){ + for(size_t i = 0; i < devices_.size(); i++){ + const device& device = devices_[i]; + if (name && !matches(device.name(), name)) + continue; + + if (type && matches(std::string("GPU"), type)) + if (!(device.type() & device::gpu)) + continue; + + if (type && matches(std::string("CPU"), type)) + if (!(device.type() & device::cpu)) + continue; + + if (platform && !matches(device.platform().name(), platform)) + continue; + + if (vendor && !matches(device.vendor(), vendor)) + continue; + + return device; + } + } + + // find the first gpu device + for(size_t i = 0; i < devices_.size(); i++){ + const device& device = devices_[i]; + + if(device.type() & device::gpu){ + return device; + } + } + + // find the first cpu device + for(size_t i = 0; i < devices_.size(); i++){ + const device& device = devices_[i]; + + if(device.type() & device::cpu){ + return device; + } + } + + // return the first device found + return devices_[0]; + } + + /// \internal_ + static bool matches(const std::string &str, const std::string &pattern) + { + return str.find(pattern) != std::string::npos; + } +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_SYSTEM_HPP diff --git a/boost/compute/type_traits.hpp b/boost/compute/type_traits.hpp new file mode 100644 index 0000000000..9ba98d9c2c --- /dev/null +++ b/boost/compute/type_traits.hpp @@ -0,0 +1,25 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_TYPE_TRAITS_HPP +#define BOOST_COMPUTE_TYPE_TRAITS_HPP + +#include <boost/compute/type_traits/common_type.hpp> +#include <boost/compute/type_traits/is_device_iterator.hpp> +#include <boost/compute/type_traits/is_fundamental.hpp> +#include <boost/compute/type_traits/is_vector_type.hpp> +#include <boost/compute/type_traits/make_vector_type.hpp> +#include <boost/compute/type_traits/result_of.hpp> +#include <boost/compute/type_traits/scalar_type.hpp> +#include <boost/compute/type_traits/type_definition.hpp> +#include <boost/compute/type_traits/type_name.hpp> +#include <boost/compute/type_traits/vector_size.hpp> + +#endif // BOOST_COMPUTE_TYPE_TRAITS_HPP diff --git a/boost/compute/type_traits/common_type.hpp b/boost/compute/type_traits/common_type.hpp new file mode 100644 index 0000000000..49a81ff3fd --- /dev/null +++ b/boost/compute/type_traits/common_type.hpp @@ -0,0 +1,55 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_TYPE_TRAITS_COMMON_TYPE_HPP +#define BOOST_COMPUTE_TYPE_TRAITS_COMMON_TYPE_HPP + +#include <boost/type_traits/common_type.hpp> + +#include <boost/compute/types/fundamental.hpp> + +namespace boost { + +/// \internal_ +#define BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPE(scalar, size) \ + template<> \ + struct common_type<BOOST_COMPUTE_MAKE_VECTOR_TYPE(scalar, size), \ + BOOST_COMPUTE_MAKE_SCALAR_TYPE(scalar)> \ + { \ + typedef BOOST_COMPUTE_MAKE_VECTOR_TYPE(scalar, size) type; \ + }; \ + template<> \ + struct common_type<BOOST_COMPUTE_MAKE_SCALAR_TYPE(scalar), \ + BOOST_COMPUTE_MAKE_VECTOR_TYPE(scalar, size)> \ + { \ + typedef BOOST_COMPUTE_MAKE_VECTOR_TYPE(scalar, size) type; \ + }; + +/// \internal_ +#define BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(scalar) \ + BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPE(scalar, 2) \ + BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPE(scalar, 4) \ + BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPE(scalar, 8) \ + BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPE(scalar, 16) \ + +BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(char) +BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(uchar) +BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(short) +BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(ushort) +BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(int) +BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(uint) +BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(long) +BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(ulong) +BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(float) +BOOST_COMPUTE_DECLARE_SCALAR_VECTOR_COMMON_TYPES(double) + +} // end boost namespace + +#endif // BOOST_COMPUTE_TYPE_TRAITS_COMMON_TYPE_HPP diff --git a/boost/compute/type_traits/detail/capture_traits.hpp b/boost/compute/type_traits/detail/capture_traits.hpp new file mode 100644 index 0000000000..e790f80696 --- /dev/null +++ b/boost/compute/type_traits/detail/capture_traits.hpp @@ -0,0 +1,33 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_TYPE_TRAITS_DETAIL_CAPTURE_TRAITS_HPP +#define BOOST_COMPUTE_TYPE_TRAITS_DETAIL_CAPTURE_TRAITS_HPP + +#include <boost/compute/type_traits/type_name.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class T> +struct capture_traits +{ + static std::string type_name() + { + return ::boost::compute::type_name<T>(); + } +}; + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_TYPE_TRAITS_DETAIL_CAPTURE_TRAITS_HPP diff --git a/boost/compute/type_traits/is_device_iterator.hpp b/boost/compute/type_traits/is_device_iterator.hpp new file mode 100644 index 0000000000..5e7021f2e8 --- /dev/null +++ b/boost/compute/type_traits/is_device_iterator.hpp @@ -0,0 +1,39 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_TYPE_TRAITS_IS_DEVICE_ITERATOR_HPP +#define BOOST_COMPUTE_TYPE_TRAITS_IS_DEVICE_ITERATOR_HPP + +#include <boost/type_traits/integral_constant.hpp> + +namespace boost { +namespace compute { + +/// Meta-function returning \c true if \c Iterator is a device-iterator. +/// +/// By default, this function returns false. Device iterator types (such as +/// buffer_iterator) should specialize this trait and return \c true. +/// +/// For example: +/// \code +/// is_device_iterator<buffer_iterator<int>>::value == true +/// is_device_iterator<std::vector<int>::iterator>::value == false +/// \endcode +template<class Iterator> +struct is_device_iterator : boost::false_type {}; + +/// \internal_ +template<class Iterator> +struct is_device_iterator<const Iterator> : is_device_iterator<Iterator> {}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_TYPE_TRAITS_IS_DEVICE_ITERATOR_HPP diff --git a/boost/compute/type_traits/is_fundamental.hpp b/boost/compute/type_traits/is_fundamental.hpp new file mode 100644 index 0000000000..6386f1184d --- /dev/null +++ b/boost/compute/type_traits/is_fundamental.hpp @@ -0,0 +1,58 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_TYPE_TRAITS_IS_FUNDAMENTAL_HPP +#define BOOST_COMPUTE_TYPE_TRAITS_IS_FUNDAMENTAL_HPP + +#include <boost/compute/types/fundamental.hpp> + +namespace boost { +namespace compute { + +/// Meta-function returning \c true if \p T is a fundamental (i.e. +/// built-in) type. +/// +/// For example, +/// \code +/// is_fundamental<float>::value == true +/// is_fundamental<std::pair<int, float>>::value == false +/// \endcode +template<class T> +struct is_fundamental : public boost::false_type {}; + +/// \internal_ +#define BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(type) \ + template<> struct is_fundamental<BOOST_PP_CAT(type, _)> : boost::true_type {}; \ + template<> struct is_fundamental<BOOST_PP_CAT(BOOST_PP_CAT(type, 2), _)> : boost::true_type {}; \ + template<> struct is_fundamental<BOOST_PP_CAT(BOOST_PP_CAT(type, 4), _)> : boost::true_type {}; \ + template<> struct is_fundamental<BOOST_PP_CAT(BOOST_PP_CAT(type, 8), _)> : boost::true_type {}; \ + template<> struct is_fundamental<BOOST_PP_CAT(BOOST_PP_CAT(type, 16), _)> : boost::true_type {}; \ + template<> struct is_fundamental<BOOST_PP_CAT(cl_, BOOST_PP_CAT(type, 2))> : boost::true_type {}; \ + template<> struct is_fundamental<BOOST_PP_CAT(cl_, BOOST_PP_CAT(type, 4))> : boost::true_type {}; \ + template<> struct is_fundamental<BOOST_PP_CAT(cl_, BOOST_PP_CAT(type, 8))> : boost::true_type {}; \ + template<> struct is_fundamental<BOOST_PP_CAT(cl_, BOOST_PP_CAT(type, 16))> : boost::true_type {}; + +BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(char) +BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(uchar) +BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(short) +BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(ushort) +BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(int) +BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(uint) +BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(long) +BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(ulong) +BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(float) +BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL(double) + +#undef BOOST_COMPUTE_DETAIL_DECLARE_FUNDAMENTAL + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_TYPE_TRAITS_IS_FUNDAMENTAL_HPP diff --git a/boost/compute/type_traits/is_vector_type.hpp b/boost/compute/type_traits/is_vector_type.hpp new file mode 100644 index 0000000000..f4382f0e2b --- /dev/null +++ b/boost/compute/type_traits/is_vector_type.hpp @@ -0,0 +1,38 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_TYPE_TRAITS_IS_VECTOR_TYPE_HPP +#define BOOST_COMPUTE_TYPE_TRAITS_IS_VECTOR_TYPE_HPP + +#include <boost/mpl/bool.hpp> + +#include <boost/compute/type_traits/vector_size.hpp> + +namespace boost { +namespace compute { + +/// Meta-function returning \c true if \p T is a vector type. +/// +/// For example, +/// \code +/// is_vector_type<int>::value == false +/// is_vector_type<float4_>::value == true +/// \endcode +/// +/// \see make_vector_type, vector_size +template<class T> +struct is_vector_type : boost::mpl::bool_<vector_size<T>::value != 1> +{ +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_TYPE_TRAITS_IS_VECTOR_TYPE_HPP diff --git a/boost/compute/type_traits/make_vector_type.hpp b/boost/compute/type_traits/make_vector_type.hpp new file mode 100644 index 0000000000..6494ff267f --- /dev/null +++ b/boost/compute/type_traits/make_vector_type.hpp @@ -0,0 +1,71 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_TYPE_TRAITS_MAKE_VECTOR_TYPE_HPP +#define BOOST_COMPUTE_TYPE_TRAITS_MAKE_VECTOR_TYPE_HPP + +#include <boost/preprocessor/cat.hpp> + +#include <boost/compute/types/fundamental.hpp> + +namespace boost { +namespace compute { + +/// Meta-function which returns a vector type for \p Scalar with \p Size. +/// +/// For example, +/// \code +/// make_vector_type<int, 2>::type == int2_ +/// make_vector_type<float, 4>::type == float4_ +/// \endcode +/// +/// \see is_vector_type +template<class Scalar, size_t Size> +struct make_vector_type +{ +}; + +/// \internal_ +template<class Scalar> +struct make_vector_type<Scalar, 1> +{ + typedef Scalar type; +}; + +/// \internal_ +#define BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTION(scalar, size) \ + template<> \ + struct make_vector_type<BOOST_PP_CAT(scalar, _), size> \ + { \ + typedef BOOST_PP_CAT(BOOST_PP_CAT(scalar, size), _) type; \ + }; + +/// \internal_ +#define BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(scalar) \ + BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTION(scalar, 2) \ + BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTION(scalar, 4) \ + BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTION(scalar, 8) \ + BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTION(scalar, 16) + +BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(char) +BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(uchar) +BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(short) +BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(ushort) +BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(int) +BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(uint) +BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(long) +BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(ulong) +BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(float) +BOOST_COMPUTE_DECLARE_MAKE_VECTOR_TYPE_FUNCTIONS(double) + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_TYPE_TRAITS_MAKE_VECTOR_TYPE_HPP diff --git a/boost/compute/type_traits/result_of.hpp b/boost/compute/type_traits/result_of.hpp new file mode 100644 index 0000000000..3f475a35da --- /dev/null +++ b/boost/compute/type_traits/result_of.hpp @@ -0,0 +1,39 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_TYPE_TRAITS_RESULT_OF_HPP +#define BOOST_COMPUTE_TYPE_TRAITS_RESULT_OF_HPP + +#include <boost/utility/result_of.hpp> + +namespace boost { +namespace compute { + +/// Returns the result of \c Function when called with \c Args. +/// +/// For example, +/// \code +/// // int + int = int +/// result_of<plus(int, int)>::type == int +/// \endcode +template<class Signature> +struct result_of +{ + // the default implementation uses the TR1-style result_of protocol. note + // that we explicitly do *not* use the C++11 decltype operator as we want + // the result type as it would be on an OpenCL device, not the actual C++ + // type resulting from "invoking" the function on the host. + typedef typename ::boost::tr1_result_of<Signature>::type type; +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_TYPE_TRAITS_RESULT_OF_HPP diff --git a/boost/compute/type_traits/scalar_type.hpp b/boost/compute/type_traits/scalar_type.hpp new file mode 100644 index 0000000000..c40682fd27 --- /dev/null +++ b/boost/compute/type_traits/scalar_type.hpp @@ -0,0 +1,72 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_TYPE_TRAITS_SCALAR_TYPE_HPP +#define BOOST_COMPUTE_TYPE_TRAITS_SCALAR_TYPE_HPP + +#include <boost/preprocessor/cat.hpp> + +#include <boost/compute/types/fundamental.hpp> + +namespace boost { +namespace compute { + +/// Meta-function returning the scalar type for a vector type. +/// +/// For example, +/// \code +/// scalar_type<float4_>::type == float +/// \endcode +template<class Vector> +struct scalar_type +{ + /// \internal_ + typedef void type; +}; + +/// \internal_ +#define BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTION(scalar) \ + template<> \ + struct scalar_type<BOOST_PP_CAT(scalar, _)> \ + { \ + typedef BOOST_PP_CAT(scalar, _) type; \ + }; + +/// \internal_ +#define BOOST_COMPUTE_DECLARE_VECTOR_SCALAR_TYPE_FUNCTION(scalar, size) \ + template<> \ + struct scalar_type<BOOST_PP_CAT(BOOST_PP_CAT(scalar, size), _)> \ + { \ + typedef BOOST_PP_CAT(scalar, _) type; \ + }; + +/// \internal_ +#define BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(scalar) \ + BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTION(scalar) \ + BOOST_COMPUTE_DECLARE_VECTOR_SCALAR_TYPE_FUNCTION(scalar, 2) \ + BOOST_COMPUTE_DECLARE_VECTOR_SCALAR_TYPE_FUNCTION(scalar, 4) \ + BOOST_COMPUTE_DECLARE_VECTOR_SCALAR_TYPE_FUNCTION(scalar, 8) \ + BOOST_COMPUTE_DECLARE_VECTOR_SCALAR_TYPE_FUNCTION(scalar, 16) + +BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(char) +BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(uchar) +BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(short) +BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(ushort) +BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(int) +BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(uint) +BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(long) +BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(ulong) +BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(float) +BOOST_COMPUTE_DECLARE_SCALAR_TYPE_FUNCTIONS(double) + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_TYPE_TRAITS_SCALAR_TYPE_HPP diff --git a/boost/compute/type_traits/type_definition.hpp b/boost/compute/type_traits/type_definition.hpp new file mode 100644 index 0000000000..de9095fbd2 --- /dev/null +++ b/boost/compute/type_traits/type_definition.hpp @@ -0,0 +1,39 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_TYPE_TRAITS_TYPE_DEFINITION_HPP +#define BOOST_COMPUTE_TYPE_TRAITS_TYPE_DEFINITION_HPP + +#include <string> + +namespace boost { +namespace compute { +namespace detail { + +template<class T> +struct type_definition_trait; + +} // end detail namespace + +/// Returns the OpenCL type definition for \c T. +/// +/// \return a string containing the type definition for \c T +/// +/// \see type_name<T>() +template<class T> +inline std::string type_definition() +{ + return detail::type_definition_trait<T>::value(); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_TYPE_TRAITS_TYPE_DEFINITION_HPP diff --git a/boost/compute/type_traits/type_name.hpp b/boost/compute/type_traits/type_name.hpp new file mode 100644 index 0000000000..86ac7bc8bc --- /dev/null +++ b/boost/compute/type_traits/type_name.hpp @@ -0,0 +1,124 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_TYPE_TRAITS_TYPE_NAME_HPP +#define BOOST_COMPUTE_TYPE_TRAITS_TYPE_NAME_HPP + +#include <boost/preprocessor/cat.hpp> +#include <boost/preprocessor/stringize.hpp> + +#include <boost/compute/types/fundamental.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class T> +struct type_name_trait; + +/// \internal_ +#define BOOST_COMPUTE_DEFINE_SCALAR_TYPE_NAME_FUNCTION(type) \ + template<> \ + struct type_name_trait<BOOST_PP_CAT(type, _)> \ + { \ + static const char* value() \ + { \ + return BOOST_PP_STRINGIZE(type); \ + } \ + }; + +/// \internal_ +#define BOOST_COMPUTE_DEFINE_VECTOR_TYPE_NAME_FUNCTION(scalar, n) \ + template<> \ + struct type_name_trait<BOOST_PP_CAT(BOOST_PP_CAT(scalar, n), _)> \ + { \ + static const char* value() \ + { \ + return BOOST_PP_STRINGIZE(BOOST_PP_CAT(scalar, n)); \ + } \ + }; + +/// \internal_ +#define BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(scalar) \ + BOOST_COMPUTE_DEFINE_SCALAR_TYPE_NAME_FUNCTION(scalar) \ + BOOST_COMPUTE_DEFINE_VECTOR_TYPE_NAME_FUNCTION(scalar, 2) \ + BOOST_COMPUTE_DEFINE_VECTOR_TYPE_NAME_FUNCTION(scalar, 4) \ + BOOST_COMPUTE_DEFINE_VECTOR_TYPE_NAME_FUNCTION(scalar, 8) \ + BOOST_COMPUTE_DEFINE_VECTOR_TYPE_NAME_FUNCTION(scalar, 16) + +BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(char) +BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(uchar) +BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(short) +BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(ushort) +BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(int) +BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(uint) +BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(long) +BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(ulong) +BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(float) +BOOST_COMPUTE_DEFINE_TYPE_NAME_FUNCTIONS(double) + +/// \internal_ +#define BOOST_COMPUTE_DEFINE_BUILTIN_TYPE_NAME_FUNCTION(type) \ + template<> \ + struct type_name_trait<type> \ + { \ + static const char* value() \ + { \ + return #type; \ + } \ + }; + +BOOST_COMPUTE_DEFINE_BUILTIN_TYPE_NAME_FUNCTION(bool) +BOOST_COMPUTE_DEFINE_BUILTIN_TYPE_NAME_FUNCTION(char) +BOOST_COMPUTE_DEFINE_BUILTIN_TYPE_NAME_FUNCTION(void) + +} // end detail namespace + +/// Returns the OpenCL type name for the type \c T as a string. +/// +/// \return a string containing the type name for \c T +/// +/// For example: +/// \code +/// type_name<float>() == "float" +/// type_name<float4_>() == "float4" +/// \endcode +/// +/// \see type_definition<T>() +template<class T> +inline const char* type_name() +{ + return detail::type_name_trait<T>::value(); +} + +} // end compute namespace +} // end boost namespace + +/// Registers the OpenCL type for the C++ \p type to \p name. +/// +/// For example, the following will allow Eigen's \c Vector2f type +/// to be used with Boost.Compute algorithms and containers as the +/// built-in \c float2 type. +/// \code +/// BOOST_COMPUTE_TYPE_NAME(Eigen::Vector2f, float2) +/// \endcode +/// +/// This macro should be invoked in the global namespace. +/// +/// \see type_name() +#define BOOST_COMPUTE_TYPE_NAME(type, name) \ + namespace boost { namespace compute { \ + template<> \ + inline const char* type_name<type>() \ + { \ + return #name; \ + }}} + +#endif // BOOST_COMPUTE_TYPE_TRAITS_TYPE_NAME_HPP diff --git a/boost/compute/type_traits/vector_size.hpp b/boost/compute/type_traits/vector_size.hpp new file mode 100644 index 0000000000..c207a6fedd --- /dev/null +++ b/boost/compute/type_traits/vector_size.hpp @@ -0,0 +1,65 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_TYPE_TRAITS_VECTOR_SIZE_HPP +#define BOOST_COMPUTE_TYPE_TRAITS_VECTOR_SIZE_HPP + +#include <boost/preprocessor/cat.hpp> + +#include <boost/compute/types/fundamental.hpp> + +namespace boost { +namespace compute { + +/// Meta-function returning the size (number of components) of a vector type +/// \p T. For scalar types this function returns \c 1. +/// +/// For example, +/// \code +/// vector_size<float>::value == 1 +/// vector_size<float4_>::value == 4 +/// \endcode +template<class T> +struct vector_size +{ + /// \internal_ + BOOST_STATIC_CONSTANT(size_t, value = 1); +}; + +/// \internal_ +#define BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTION(scalar, size) \ + template<> \ + struct vector_size<BOOST_PP_CAT(BOOST_PP_CAT(scalar, size), _)> \ + { \ + BOOST_STATIC_CONSTANT(size_t, value = size); \ + }; + +/// \internal_ +#define BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(scalar) \ + BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTION(scalar, 2) \ + BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTION(scalar, 4) \ + BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTION(scalar, 8) \ + BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTION(scalar, 16) + +BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(char) +BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(uchar) +BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(short) +BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(ushort) +BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(int) +BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(uint) +BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(long) +BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(ulong) +BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(float) +BOOST_COMPUTE_DECLARE_VECTOR_SIZE_FUNCTIONS(double) + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_TYPE_TRAITS_VECTOR_SIZE_HPP diff --git a/boost/compute/types.hpp b/boost/compute/types.hpp new file mode 100644 index 0000000000..3d9120b65c --- /dev/null +++ b/boost/compute/types.hpp @@ -0,0 +1,24 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_TYPES_HPP +#define BOOST_COMPUTE_TYPES_HPP + +/// \file +/// +/// Meta-header to include all Boost.Compute types headers. + +#include <boost/compute/types/complex.hpp> +#include <boost/compute/types/fundamental.hpp> +#include <boost/compute/types/pair.hpp> +#include <boost/compute/types/struct.hpp> +#include <boost/compute/types/tuple.hpp> + +#endif // BOOST_COMPUTE_TYPES_HPP diff --git a/boost/compute/types/builtin.hpp b/boost/compute/types/builtin.hpp new file mode 100644 index 0000000000..dc30f584eb --- /dev/null +++ b/boost/compute/types/builtin.hpp @@ -0,0 +1,12 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +// deprecated, use <boost/compute/types/fundamental.hpp> instead +#include <boost/compute/types/fundamental.hpp> diff --git a/boost/compute/types/complex.hpp b/boost/compute/types/complex.hpp new file mode 100644 index 0000000000..1d60cef9e5 --- /dev/null +++ b/boost/compute/types/complex.hpp @@ -0,0 +1,196 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_TYPES_COMPLEX_HPP +#define BOOST_COMPUTE_TYPES_COMPLEX_HPP + +#include <complex> + +#include <boost/compute/functional.hpp> +#include <boost/compute/types/fundamental.hpp> +#include <boost/compute/type_traits/make_vector_type.hpp> +#include <boost/compute/type_traits/type_name.hpp> +#include <boost/compute/detail/meta_kernel.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class T> +meta_kernel& operator<<(meta_kernel &kernel, const std::complex<T> &x) +{ + typedef typename std::complex<T> value_type; + + kernel << "(" << type_name<value_type>() << ")" + << "(" << x.real() << ", " << x.imag() << ")"; + + return kernel; +} + +// get<N>() result type specialization for std::complex<> +template<size_t N, class T> +struct get_result_type<N, std::complex<T> > +{ + typedef T type; +}; + +// get<N>() specialization for std::complex<> +template<size_t N, class Arg, class T> +inline meta_kernel& operator<<(meta_kernel &kernel, + const invoked_get<N, Arg, std::complex<T> > &expr) +{ + BOOST_STATIC_ASSERT(N < 2); + + return kernel << expr.m_arg << (N == 0 ? ".x" : ".y"); +} + +} // end detail namespace + +// returns the real component of a complex<T> +template<class T> +struct real +{ + typedef T result_type; + + template<class Arg> + detail::invoked_get<0, Arg, std::complex<T> > + operator()(const Arg &x) const + { + return detail::invoked_get<0, Arg, std::complex<T> >(x); + } +}; + +// returns the imaginary component of a complex<T> +template<class T> +struct imag +{ + typedef T result_type; + + template<class Arg> + detail::invoked_get<1, Arg, std::complex<T> > + operator()(const Arg &x) const + { + return detail::invoked_get<1, Arg, std::complex<T> >(x); + } +}; + +namespace detail { + +template<class Arg1, class Arg2, class T> +struct invoked_complex_multiplies +{ + typedef typename std::complex<T> result_type; + + invoked_complex_multiplies(const Arg1 &x, const Arg2 &y) + : m_x(x), + m_y(y) + { + } + + Arg1 m_x; + Arg2 m_y; +}; + +template<class Arg1, class Arg2, class T> +inline meta_kernel& operator<<(meta_kernel &kernel, + const invoked_complex_multiplies<Arg1, Arg2, T> &expr) +{ + typedef typename std::complex<T> value_type; + + kernel << "(" << type_name<value_type>() << ")" + << "(" << expr.m_x << ".x*" << expr.m_y << ".x-" + << expr.m_x << ".y*" << expr.m_y << ".y," + << expr.m_x << ".y*" << expr.m_y << ".x+" + << expr.m_x << ".x*" << expr.m_y << ".y" << ")"; + + return kernel; +} + +template<class Arg, class T> +struct invoked_complex_conj +{ + typedef typename std::complex<T> result_type; + + invoked_complex_conj(const Arg &arg) + : m_arg(arg) + { + } + + Arg m_arg; +}; + +template<class Arg, class T> +inline meta_kernel& operator<<(meta_kernel &kernel, + const invoked_complex_conj<Arg, T> &expr) +{ + typedef typename std::complex<T> value_type; + + kernel << "(" << type_name<value_type>() << ")" + << "(" << expr.m_arg << ".x" << ", -" << expr.m_arg << ".y" << ")"; + + return kernel; +} + +} // end detail namespace + +// specialization for multiplies<T> +template<class T> +class multiplies<std::complex<T> > : + public function<std::complex<T> (std::complex<T>, std::complex<T>)> +{ +public: + multiplies() : + function< + std::complex<T> (std::complex<T>, std::complex<T>) + >("complex_multiplies") + { + } + + template<class Arg1, class Arg2> + detail::invoked_complex_multiplies<Arg1, Arg2, T> + operator()(const Arg1 &x, const Arg2 &y) const + { + return detail::invoked_complex_multiplies<Arg1, Arg2, T>(x, y); + } +}; + +// returns the complex conjugate of a complex<T> +template<class T> +struct conj +{ + typedef typename std::complex<T> result_type; + + template<class Arg> + detail::invoked_complex_conj<Arg, T> + operator()(const Arg &x) const + { + return detail::invoked_complex_conj<Arg, T>(x); + } +}; + +namespace detail { + +// type_name() specialization for std::complex +template<class T> +struct type_name_trait<std::complex<T> > +{ + static const char* value() + { + typedef typename make_vector_type<T, 2>::type vector_type; + + return type_name<vector_type>(); + } +}; + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_TYPES_COMPLEX_HPP diff --git a/boost/compute/types/fundamental.hpp b/boost/compute/types/fundamental.hpp new file mode 100644 index 0000000000..c1502e327e --- /dev/null +++ b/boost/compute/types/fundamental.hpp @@ -0,0 +1,172 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_TYPES_FUNDAMENTAL_HPP +#define BOOST_COMPUTE_TYPES_FUNDAMENTAL_HPP + +#include <cstring> +#include <ostream> + +#include <boost/preprocessor/cat.hpp> +#include <boost/preprocessor/comma.hpp> +#include <boost/preprocessor/repetition.hpp> +#include <boost/preprocessor/stringize.hpp> + +#include <boost/compute/cl.hpp> + +namespace boost { +namespace compute { + +// scalar data types +typedef cl_char char_; +typedef cl_uchar uchar_; +typedef cl_short short_; +typedef cl_ushort ushort_; +typedef cl_int int_; +typedef cl_uint uint_; +typedef cl_long long_; +typedef cl_ulong ulong_; +typedef cl_float float_; +typedef cl_double double_; + +// converts uchar to ::boost::compute::uchar_ +#define BOOST_COMPUTE_MAKE_SCALAR_TYPE(scalar) \ + BOOST_PP_CAT(::boost::compute::scalar, _) + +// converts float, 4 to ::boost::compute::float4_ +#define BOOST_COMPUTE_MAKE_VECTOR_TYPE(scalar, size) \ + BOOST_PP_CAT(BOOST_PP_CAT(::boost::compute::scalar, size), _) + +// vector data types +template<class Scalar, size_t N> +class vector_type +{ +public: + typedef Scalar scalar_type; + + vector_type() + { + + } + + explicit vector_type(const Scalar scalar) + { + for(size_t i = 0; i < N; i++) + m_value[i] = scalar; + } + + vector_type(const vector_type<Scalar, N> &other) + { + std::memcpy(m_value, other.m_value, sizeof(m_value)); + } + + vector_type<Scalar, N>& + operator=(const vector_type<Scalar, N> &other) + { + std::memcpy(m_value, other.m_value, sizeof(m_value)); + return *this; + } + + size_t size() const + { + return N; + } + + Scalar& operator[](size_t i) + { + return m_value[i]; + } + + Scalar operator[](size_t i) const + { + return m_value[i]; + } + + bool operator==(const vector_type<Scalar, N> &other) const + { + return std::memcmp(m_value, other.m_value, sizeof(m_value)) == 0; + } + + bool operator!=(const vector_type<Scalar, N> &other) const + { + return !(*this == other); + } + +protected: + scalar_type m_value[N]; +}; + +#define BOOST_COMPUTE_VECTOR_TYPE_CTOR_ARG_FUNCTION(z, i, _) \ + BOOST_PP_COMMA_IF(i) scalar_type BOOST_PP_CAT(arg, i) +#define BOOST_COMPUTE_VECTOR_TYPE_DECLARE_CTOR_ARGS(scalar, size) \ + BOOST_PP_REPEAT(size, BOOST_COMPUTE_VECTOR_TYPE_CTOR_ARG_FUNCTION, _) +#define BOOST_COMPUTE_VECTOR_TYPE_ASSIGN_CTOR_ARG(z, i, _) \ + m_value[i] = BOOST_PP_CAT(arg, i); +#define BOOST_COMPUTE_VECTOR_TYPE_ASSIGN_CTOR_SINGLE_ARG(z, i, _) \ + m_value[i] = arg; + +#define BOOST_COMPUTE_DECLARE_VECTOR_TYPE_CLASS(cl_scalar, size, class_name) \ + class class_name : public vector_type<cl_scalar, size> \ + { \ + public: \ + class_name() { } \ + explicit class_name( scalar_type arg ) \ + { \ + BOOST_PP_REPEAT(size, BOOST_COMPUTE_VECTOR_TYPE_ASSIGN_CTOR_SINGLE_ARG, _) \ + } \ + class_name( \ + BOOST_PP_REPEAT(size, BOOST_COMPUTE_VECTOR_TYPE_CTOR_ARG_FUNCTION, _) \ + ) \ + { \ + BOOST_PP_REPEAT(size, BOOST_COMPUTE_VECTOR_TYPE_ASSIGN_CTOR_ARG, _) \ + } \ + }; + +#define BOOST_COMPUTE_DECLARE_VECTOR_TYPE(scalar, size) \ + BOOST_COMPUTE_DECLARE_VECTOR_TYPE_CLASS(BOOST_PP_CAT(cl_, scalar), \ + size, \ + BOOST_PP_CAT(BOOST_PP_CAT(scalar, size), _)) \ + \ + inline std::ostream& operator<<( \ + std::ostream &s, \ + const BOOST_COMPUTE_MAKE_VECTOR_TYPE(scalar, size) &v) \ + { \ + s << BOOST_PP_STRINGIZE(BOOST_PP_CAT(scalar, size)) << "("; \ + for(size_t i = 0; i < size; i++){\ + s << v[i]; \ + if(i != size - 1){\ + s << ", "; \ + } \ + } \ + s << ")"; \ + return s; \ + } + +#define BOOST_COMPUTE_DECLARE_VECTOR_TYPES(scalar) \ + BOOST_COMPUTE_DECLARE_VECTOR_TYPE(scalar, 2) \ + BOOST_COMPUTE_DECLARE_VECTOR_TYPE(scalar, 4) \ + BOOST_COMPUTE_DECLARE_VECTOR_TYPE(scalar, 8) \ + BOOST_COMPUTE_DECLARE_VECTOR_TYPE(scalar, 16) \ + +BOOST_COMPUTE_DECLARE_VECTOR_TYPES(char) +BOOST_COMPUTE_DECLARE_VECTOR_TYPES(uchar) +BOOST_COMPUTE_DECLARE_VECTOR_TYPES(short) +BOOST_COMPUTE_DECLARE_VECTOR_TYPES(ushort) +BOOST_COMPUTE_DECLARE_VECTOR_TYPES(int) +BOOST_COMPUTE_DECLARE_VECTOR_TYPES(uint) +BOOST_COMPUTE_DECLARE_VECTOR_TYPES(long) +BOOST_COMPUTE_DECLARE_VECTOR_TYPES(ulong) +BOOST_COMPUTE_DECLARE_VECTOR_TYPES(float) +BOOST_COMPUTE_DECLARE_VECTOR_TYPES(double) + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_TYPES_FUNDAMENTAL_HPP diff --git a/boost/compute/types/pair.hpp b/boost/compute/types/pair.hpp new file mode 100644 index 0000000000..96db6539f1 --- /dev/null +++ b/boost/compute/types/pair.hpp @@ -0,0 +1,117 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_TYPES_PAIR_HPP +#define BOOST_COMPUTE_TYPES_PAIR_HPP + +#include <string> +#include <utility> + +#include <boost/compute/functional/get.hpp> +#include <boost/compute/type_traits/type_definition.hpp> +#include <boost/compute/type_traits/type_name.hpp> +#include <boost/compute/detail/meta_kernel.hpp> + +namespace boost { +namespace compute { +namespace detail { + +// meta_kernel operator for std::pair literals +template<class T1, class T2> +inline meta_kernel& +operator<<(meta_kernel &kernel, const std::pair<T1, T2> &x) +{ + kernel << "(" << type_name<std::pair<T1, T2> >() << ")" + << "{" << kernel.make_lit(x.first) << ", " + << kernel.make_lit(x.second) << "}"; + + return kernel; +} + +// inject_type() specialization for std::pair +template<class T1, class T2> +struct inject_type_impl<std::pair<T1, T2> > +{ + void operator()(meta_kernel &kernel) + { + typedef std::pair<T1, T2> pair_type; + + kernel.inject_type<T1>(); + kernel.inject_type<T2>(); + + kernel.add_type_declaration<pair_type>(type_definition<pair_type>()); + } +}; + +// get<N>() result type specialization for std::pair<> +template<class T1, class T2> +struct get_result_type<0, std::pair<T1, T2> > +{ + typedef T1 type; +}; + +template<class T1, class T2> +struct get_result_type<1, std::pair<T1, T2> > +{ + typedef T2 type; +}; + +// get<N>() specialization for std::pair<> +template<size_t N, class Arg, class T1, class T2> +inline meta_kernel& operator<<(meta_kernel &kernel, + const invoked_get<N, Arg, std::pair<T1, T2> > &expr) +{ + kernel.inject_type<std::pair<T1, T2> >(); + + return kernel << expr.m_arg << (N == 0 ? ".first" : ".second"); +} + +} // end detail namespace + +namespace detail { + +// type_name() specialization for std::pair +template<class T1, class T2> +struct type_name_trait<std::pair<T1, T2> > +{ + static const char* value() + { + static std::string name = + std::string("_pair_") + + type_name<T1>() + "_" + type_name<T2>() + + "_t"; + + return name.c_str(); + } +}; + +// type_definition() specialization for std::pair +template<class T1, class T2> +struct type_definition_trait<std::pair<T1, T2> > +{ + static std::string value() + { + typedef std::pair<T1, T2> pair_type; + + std::stringstream declaration; + declaration << "typedef struct {\n" + << " " << type_name<T1>() << " first;\n" + << " " << type_name<T2>() << " second;\n" + << "} " << type_name<pair_type>() << ";\n"; + + return declaration.str(); + } +}; + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_TYPES_PAIR_HPP diff --git a/boost/compute/types/struct.hpp b/boost/compute/types/struct.hpp new file mode 100644 index 0000000000..92aeaedf22 --- /dev/null +++ b/boost/compute/types/struct.hpp @@ -0,0 +1,173 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_TYPES_STRUCT_HPP +#define BOOST_COMPUTE_TYPES_STRUCT_HPP + +#include <sstream> + +#include <boost/static_assert.hpp> + +#include <boost/preprocessor/expr_if.hpp> +#include <boost/preprocessor/stringize.hpp> +#include <boost/preprocessor/seq/fold_left.hpp> +#include <boost/preprocessor/seq/for_each.hpp> +#include <boost/preprocessor/seq/transform.hpp> + +#include <boost/compute/type_traits/type_definition.hpp> +#include <boost/compute/type_traits/type_name.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/detail/variadic_macros.hpp> + +namespace boost { +namespace compute { +namespace detail { + +template<class Struct, class T> +inline std::string adapt_struct_insert_member(T Struct::*, const char *name) +{ + std::stringstream s; + s << " " << type_name<T>() << " " << name << ";\n"; + return s.str(); +} + + +template<class Struct, class T, int N> +inline std::string adapt_struct_insert_member(T (Struct::*)[N], const char *name) +{ + std::stringstream s; + s << " " << type_name<T>() << " " << name << "[" << N << "]" << ";\n"; + return s.str(); +} + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +/// \internal_ +#define BOOST_COMPUTE_DETAIL_ADAPT_STRUCT_INSERT_MEMBER(r, type, member) \ + << ::boost::compute::detail::adapt_struct_insert_member( \ + &type::member, BOOST_PP_STRINGIZE(member) \ + ) + +/// \internal_ +#define BOOST_COMPUTE_DETAIL_ADAPT_STRUCT_STREAM_MEMBER(r, data, i, elem) \ + BOOST_PP_EXPR_IF(i, << ", ") << data.elem + +/// \internal_ +#define BOOST_COMPUTE_DETAIL_STRUCT_MEMBER_SIZE(s, struct_, member_) \ + sizeof(((struct_ *)0)->member_) + +/// \internal_ +#define BOOST_COMPUTE_DETAIL_STRUCT_MEMBER_SIZE_ADD(s, x, y) (x+y) + +/// \internal_ +#define BOOST_COMPUTE_DETAIL_STRUCT_MEMBER_SIZE_SUM(struct_, members_) \ + BOOST_PP_SEQ_FOLD_LEFT( \ + BOOST_COMPUTE_DETAIL_STRUCT_MEMBER_SIZE_ADD, \ + 0, \ + BOOST_PP_SEQ_TRANSFORM( \ + BOOST_COMPUTE_DETAIL_STRUCT_MEMBER_SIZE, struct_, members_ \ + ) \ + ) + +/// \internal_ +/// +/// Returns true if struct_ contains no internal padding bytes (i.e. it is +/// packed). members_ is a sequence of the names of the struct members. +#define BOOST_COMPUTE_DETAIL_STRUCT_IS_PACKED(struct_, members_) \ + (sizeof(struct_) == BOOST_COMPUTE_DETAIL_STRUCT_MEMBER_SIZE_SUM(struct_, members_)) + +/// The BOOST_COMPUTE_ADAPT_STRUCT() macro makes a C++ struct/class available +/// to OpenCL kernels. +/// +/// \param type The C++ type. +/// \param name The OpenCL name. +/// \param members A tuple of the struct's members. +/// +/// For example, to adapt a 2D particle struct with position (x, y) and +/// velocity (dx, dy): +/// \code +/// // c++ struct definition +/// struct Particle +/// { +/// float x, y; +/// float dx, dy; +/// }; +/// +/// // adapt struct for OpenCL +/// BOOST_COMPUTE_ADAPT_STRUCT(Particle, Particle, (x, y, dx, dy)) +/// \endcode +/// +/// After adapting the struct it can be used in Boost.Compute containers +/// and with Boost.Compute algorithms: +/// \code +/// // create vector of particles +/// boost::compute::vector<Particle> particles = ... +/// +/// // function to compare particles by their x-coordinate +/// BOOST_COMPUTE_FUNCTION(bool, sort_by_x, (Particle a, Particle b), +/// { +/// return a.x < b.x; +/// }); +/// +/// // sort particles by their x-coordinate +/// boost::compute::sort( +/// particles.begin(), particles.end(), sort_by_x, queue +/// ); +/// \endcode +/// +/// Due to differences in struct padding between the host compiler and the +/// device compiler, the \c BOOST_COMPUTE_ADAPT_STRUCT() macro requires that +/// the adapted struct is packed (i.e. no padding bytes between members). +/// +/// \see type_name() +#define BOOST_COMPUTE_ADAPT_STRUCT(type, name, members) \ + BOOST_STATIC_ASSERT_MSG( \ + BOOST_COMPUTE_DETAIL_STRUCT_IS_PACKED(type, BOOST_COMPUTE_PP_TUPLE_TO_SEQ(members)), \ + "BOOST_COMPUTE_ADAPT_STRUCT() does not support structs with internal padding." \ + ); \ + BOOST_COMPUTE_TYPE_NAME(type, name) \ + namespace boost { namespace compute { \ + template<> \ + inline std::string type_definition<type>() \ + { \ + std::stringstream declaration; \ + declaration << "typedef struct __attribute__((packed)) {\n" \ + BOOST_PP_SEQ_FOR_EACH( \ + BOOST_COMPUTE_DETAIL_ADAPT_STRUCT_INSERT_MEMBER, \ + type, \ + BOOST_COMPUTE_PP_TUPLE_TO_SEQ(members) \ + ) \ + << "} " << type_name<type>() << ";\n"; \ + return declaration.str(); \ + } \ + namespace detail { \ + template<> \ + struct inject_type_impl<type> \ + { \ + void operator()(meta_kernel &kernel) \ + { \ + kernel.add_type_declaration<type>(type_definition<type>()); \ + } \ + }; \ + inline meta_kernel& operator<<(meta_kernel &k, type s) \ + { \ + return k << "(" << #name << "){" \ + BOOST_PP_SEQ_FOR_EACH_I( \ + BOOST_COMPUTE_DETAIL_ADAPT_STRUCT_STREAM_MEMBER, \ + s, \ + BOOST_COMPUTE_PP_TUPLE_TO_SEQ(members) \ + ) \ + << "}"; \ + } \ + }}} + +#endif // BOOST_COMPUTE_TYPES_STRUCT_HPP diff --git a/boost/compute/types/tuple.hpp b/boost/compute/types/tuple.hpp new file mode 100644 index 0000000000..095bd95448 --- /dev/null +++ b/boost/compute/types/tuple.hpp @@ -0,0 +1,220 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_TYPES_TUPLE_HPP +#define BOOST_COMPUTE_TYPES_TUPLE_HPP + +#include <string> +#include <utility> + +#include <boost/preprocessor/enum.hpp> +#include <boost/preprocessor/expr_if.hpp> +#include <boost/preprocessor/repetition.hpp> +#include <boost/tuple/tuple.hpp> + +#include <boost/compute/config.hpp> +#include <boost/compute/functional/get.hpp> +#include <boost/compute/type_traits/type_name.hpp> +#include <boost/compute/detail/meta_kernel.hpp> + +#ifndef BOOST_COMPUTE_NO_STD_TUPLE +#include <tuple> +#endif + +namespace boost { +namespace compute { +namespace detail { + +// meta_kernel operators for boost::tuple literals +#define BOOST_COMPUTE_PRINT_ELEM(z, n, unused) \ + BOOST_PP_EXPR_IF(n, << ", ") \ + << kernel.make_lit(boost::get<n>(x)) + +#define BOOST_COMPUTE_PRINT_TUPLE(z, n, unused) \ +template<BOOST_PP_ENUM_PARAMS(n, class T)> \ +inline meta_kernel& \ +operator<<(meta_kernel &kernel, \ + const boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> &x) \ +{ \ + return kernel \ + << "(" \ + << type_name<boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> >() \ + << ")" \ + << "{" \ + BOOST_PP_REPEAT(n, BOOST_COMPUTE_PRINT_ELEM, ~) \ + << "}"; \ +} + +BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_PRINT_TUPLE, ~) + +#undef BOOST_COMPUTE_PRINT_TUPLE +#undef BOOST_COMPUTE_PRINT_ELEM + +// inject_type() specializations for boost::tuple +#define BOOST_COMPUTE_INJECT_TYPE(z, n, unused) \ + kernel.inject_type<T ## n>(); + +#define BOOST_COMPUTE_INJECT_DECL(z, n, unused) \ + << " " << type_name<T ## n>() << " v" #n ";\n" + +#define BOOST_COMPUTE_INJECT_IMPL(z, n, unused) \ +template<BOOST_PP_ENUM_PARAMS(n, class T)> \ +struct inject_type_impl<boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> > \ +{ \ + void operator()(meta_kernel &kernel) \ + { \ + typedef boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> tuple_type; \ + BOOST_PP_REPEAT(n, BOOST_COMPUTE_INJECT_TYPE, ~) \ + std::stringstream declaration; \ + declaration << "typedef struct {\n" \ + BOOST_PP_REPEAT(n, BOOST_COMPUTE_INJECT_DECL, ~) \ + << "} " << type_name<tuple_type>() << ";\n"; \ + kernel.add_type_declaration<tuple_type>(declaration.str()); \ + } \ +}; + +BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_INJECT_IMPL, ~) + +#undef BOOST_COMPUTE_INJECT_IMPL +#undef BOOST_COMPUTE_INJECT_DECL +#undef BOOST_COMPUTE_INJECT_TYPE + +#ifdef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES +// type_name() specializations for boost::tuple (without variadic templates) +#define BOOST_COMPUTE_PRINT_TYPE(z, n, unused) \ + + type_name<T ## n>() + "_" + +#define BOOST_COMPUTE_PRINT_TYPE_NAME(z, n, unused) \ +template<BOOST_PP_ENUM_PARAMS(n, class T)> \ +struct type_name_trait<boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> > \ +{ \ + static const char* value() \ + { \ + static std::string name = \ + std::string("boost_tuple_") \ + BOOST_PP_REPEAT(n, BOOST_COMPUTE_PRINT_TYPE, ~) \ + "t"; \ + return name.c_str(); \ + } \ +}; + +BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_PRINT_TYPE_NAME, ~) + +#undef BOOST_COMPUTE_PRINT_TYPE_NAME +#undef BOOST_COMPUTE_PRINT_TYPE + +#else +template<size_t N, class T, class... Rest> +struct write_tuple_type_names +{ + void operator()(std::ostream &os) + { + os << type_name<T>() << "_"; + write_tuple_type_names<N-1, Rest...>()(os); + } +}; + +template<class T, class... Rest> +struct write_tuple_type_names<1, T, Rest...> +{ + void operator()(std::ostream &os) + { + os << type_name<T>(); + } +}; + +// type_name<> specialization for boost::tuple<...> (with variadic templates) +template<class... T> +struct type_name_trait<boost::tuple<T...>> +{ + static const char* value() + { + static std::string str = make_type_name(); + + return str.c_str(); + } + + static std::string make_type_name() + { + typedef typename boost::tuple<T...> tuple_type; + + std::stringstream s; + s << "boost_tuple_"; + write_tuple_type_names< + boost::tuples::length<tuple_type>::value, T... + >()(s); + s << "_t"; + return s.str(); + } +}; +#endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES + +#ifndef BOOST_COMPUTE_NO_STD_TUPLE +// type_name<> specialization for std::tuple<T...> +template<class... T> +struct type_name_trait<std::tuple<T...>> +{ + static const char* value() + { + static std::string str = make_type_name(); + + return str.c_str(); + } + + static std::string make_type_name() + { + typedef typename std::tuple<T...> tuple_type; + + std::stringstream s; + s << "std_tuple_"; + write_tuple_type_names< + std::tuple_size<tuple_type>::value, T... + >()(s); + s << "_t"; + return s.str(); + } +}; +#endif // BOOST_COMPUTE_NO_STD_TUPLE + +// get<N>() result type specialization for boost::tuple<> +#define BOOST_COMPUTE_GET_RESULT_TYPE(z, n, unused) \ +template<size_t N, BOOST_PP_ENUM_PARAMS(n, class T)> \ +struct get_result_type<N, boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> > \ +{ \ + typedef typename boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> T; \ + typedef typename boost::tuples::element<N, T>::type type; \ +}; + +BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_GET_RESULT_TYPE, ~) + +#undef BOOST_COMPUTE_GET_RESULT_TYPE + + +// get<N>() specialization for boost::tuple<> +#define BOOST_COMPUTE_GET_N(z, n, unused) \ +template<size_t N, class Arg, BOOST_PP_ENUM_PARAMS(n, class T)> \ +inline meta_kernel& operator<<(meta_kernel &kernel, \ + const invoked_get<N, Arg, boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> > &expr) \ +{ \ + typedef typename boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> T; \ + BOOST_STATIC_ASSERT(N < size_t(boost::tuples::length<T>::value)); \ + kernel.inject_type<T>(); \ + return kernel << expr.m_arg << ".v" << uint_(N); \ +} + +BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_GET_N, ~) + +#undef BOOST_COMPUTE_GET_N + +} // end detail namespace +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_TYPES_TUPLE_HPP diff --git a/boost/compute/user_event.hpp b/boost/compute/user_event.hpp new file mode 100644 index 0000000000..a3fdba033e --- /dev/null +++ b/boost/compute/user_event.hpp @@ -0,0 +1,88 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_USER_EVENT_HPP +#define BOOST_COMPUTE_USER_EVENT_HPP + +#include <boost/compute/event.hpp> +#include <boost/compute/context.hpp> + +namespace boost { +namespace compute { + +#if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) +/// \class user_event +/// \brief An user-created event. +/// +/// \opencl_version_warning{1,1} +/// +/// \see event +class user_event : public event +{ +public: + /// Creates a new user-event object. + /// + /// \see_opencl_ref{clCreateUserEvent} + explicit user_event(const context &context) + { + cl_int error; + m_event = clCreateUserEvent(context.get(), &error); + if(!m_event){ + BOOST_THROW_EXCEPTION(opencl_error(error)); + } + } + + /// Creates a new user-event from \p other. + user_event(const user_event &other) + : event(other) + { + } + + /// Copies the user-event from \p other to \c *this. + user_event& operator=(const user_event &other) + { + event::operator=(other); + + return *this; + } + + #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES + /// Move-constructs a new user event object from \p other. + user_event(user_event&& other) BOOST_NOEXCEPT + : event(std::move(other)) + { + } + + /// Move-assigns the user event from \p other to \c *this. + user_event& operator=(user_event&& other) BOOST_NOEXCEPT + { + event::operator=(std::move(other)); + + return *this; + } + #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES + + /// Sets the execution status for the user-event. + /// + /// \see_opencl_ref{clSetUserEventStatus} + void set_status(cl_int execution_status) + { + cl_int ret = clSetUserEventStatus(m_event, execution_status); + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + } +}; +#endif // CL_VERSION_1_1 + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_EVENT_HPP diff --git a/boost/compute/utility.hpp b/boost/compute/utility.hpp new file mode 100644 index 0000000000..e6d1f6eab6 --- /dev/null +++ b/boost/compute/utility.hpp @@ -0,0 +1,21 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_UTILITY_HPP +#define BOOST_COMPUTE_UTILITY_HPP + +#include <boost/compute/utility/dim.hpp> +#include <boost/compute/utility/extents.hpp> +#include <boost/compute/utility/invoke.hpp> +#include <boost/compute/utility/program_cache.hpp> +#include <boost/compute/utility/source.hpp> +#include <boost/compute/utility/wait_list.hpp> + +#endif // BOOST_COMPUTE_UTILITY_HPP diff --git a/boost/compute/utility/dim.hpp b/boost/compute/utility/dim.hpp new file mode 100644 index 0000000000..210c09cf6e --- /dev/null +++ b/boost/compute/utility/dim.hpp @@ -0,0 +1,76 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_UTILITY_DIM_HPP +#define BOOST_COMPUTE_UTILITY_DIM_HPP + +#include <boost/compute/config.hpp> +#include <boost/compute/utility/extents.hpp> + +namespace boost { +namespace compute { + +#ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES +/// The variadic \c dim() function provides a concise syntax for creating +/// \ref extents objects. +/// +/// For example, +/// \code +/// extents<2> region = dim(640, 480); // region == (640, 480) +/// \endcode +/// +/// \see \ref extents "extents<N>" +template<class... Args> +inline extents<sizeof...(Args)> dim(Args... args) +{ + return extents<sizeof...(Args)>({ static_cast<size_t>(args)... }); +} + +#if BOOST_WORKAROUND(BOOST_MSVC, <= 1800) +// for some inexplicable reason passing one parameter to 'dim' variadic template +// generates compile error on msvc 2013 update 4 +template<class T> +inline extents<1> dim(T arg) +{ + return extents<1>(static_cast<size_t>(arg)); +} +#endif // BOOST_WORKAROUND(BOOST_MSVC, <= 1800) + +#else +// dim() function definitions for non-c++11 compilers +#define BOOST_COMPUTE_DETAIL_ASSIGN_DIM(z, n, var) \ + var[n] = BOOST_PP_CAT(e, n); + +#define BOOST_COMPUTE_DETAIL_DEFINE_DIM(z, n, var) \ + inline extents<n> dim(BOOST_PP_ENUM_PARAMS(n, size_t e)) \ + { \ + extents<n> exts; \ + BOOST_PP_REPEAT(n, BOOST_COMPUTE_DETAIL_ASSIGN_DIM, exts) \ + return exts; \ + } + +BOOST_PP_REPEAT(BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_DETAIL_DEFINE_DIM, ~) + +#undef BOOST_COMPUTE_DETAIL_ASSIGN_DIM +#undef BOOST_COMPUTE_DETAIL_DEFINE_DIM + +#endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES + +/// \internal_ +template<size_t N> +inline extents<N> dim() +{ + return extents<N>(); +} + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_UTILITY_DIM_HPP diff --git a/boost/compute/utility/extents.hpp b/boost/compute/utility/extents.hpp new file mode 100644 index 0000000000..9666d47abc --- /dev/null +++ b/boost/compute/utility/extents.hpp @@ -0,0 +1,164 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_UTILITY_EXTENTS_HPP +#define BOOST_COMPUTE_UTILITY_EXTENTS_HPP + +#include <functional> +#include <numeric> + +#include <boost/compute/config.hpp> + +#ifndef BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST +#include <initializer_list> +#endif + +#include <boost/array.hpp> + +namespace boost { +namespace compute { + +/// The extents class contains an array of n-dimensional extents. +/// +/// \see dim() +template<size_t N> +class extents +{ +public: + typedef size_t size_type; + static const size_type static_size = N; + typedef boost::array<size_t, N> array_type; + typedef typename array_type::iterator iterator; + typedef typename array_type::const_iterator const_iterator; + + /// Creates an extents object with each component set to zero. + /// + /// For example: + /// \code + /// extents<3> exts(); // (0, 0, 0) + /// \endcode + extents() + { + m_extents.fill(0); + } + + /// Creates an extents object with each component set to \p value. + /// + /// For example: + /// \code + /// extents<3> exts(1); // (1, 1, 1) + /// \endcode + explicit extents(size_t value) + { + m_extents.fill(value); + } + + #ifndef BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST + /// Creates an extents object with \p values. + extents(std::initializer_list<size_t> values) + { + BOOST_ASSERT(values.size() == N); + + std::copy(values.begin(), values.end(), m_extents.begin()); + } + #endif // BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST + + /// Returns the size (i.e. dimensionality) of the extents array. + size_type size() const + { + return N; + } + + /// Returns the linear size of the extents. This is equivalent to the + /// product of each extent in each dimension. + size_type linear() const + { + return std::accumulate( + m_extents.begin(), m_extents.end(), 1, std::multiplies<size_type>() + ); + } + + /// Returns a pointer to the extents data array. + /// + /// This is useful for passing the extents data to OpenCL APIs which + /// expect an array of \c size_t. + size_t* data() + { + return m_extents.data(); + } + + /// \overload + const size_t* data() const + { + return m_extents.data(); + } + + iterator begin() + { + return m_extents.begin(); + } + + const_iterator begin() const + { + return m_extents.begin(); + } + + const_iterator cbegin() const + { + return m_extents.cbegin(); + } + + iterator end() + { + return m_extents.end(); + } + + const_iterator end() const + { + return m_extents.end(); + } + + const_iterator cend() const + { + return m_extents.cend(); + } + + /// Returns a reference to the extent at \p index. + size_t& operator[](size_t index) + { + return m_extents[index]; + } + + /// \overload + const size_t& operator[](size_t index) const + { + return m_extents[index]; + } + + /// Returns \c true if the extents in \c *this are the same as \p other. + bool operator==(const extents &other) const + { + return m_extents == other.m_extents; + } + + /// Returns \c true if the extents in \c *this are not the same as \p other. + bool operator!=(const extents &other) const + { + return m_extents != other.m_extents; + } + +private: + array_type m_extents; +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_UTILITY_EXTENTS_HPP diff --git a/boost/compute/utility/invoke.hpp b/boost/compute/utility/invoke.hpp new file mode 100644 index 0000000000..b03162aa2b --- /dev/null +++ b/boost/compute/utility/invoke.hpp @@ -0,0 +1,71 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://kylelutz.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_UTILITY_INVOKE_HPP +#define BOOST_COMPUTE_UTILITY_INVOKE_HPP + +#include <boost/preprocessor/enum.hpp> +#include <boost/preprocessor/repetition.hpp> + +#include <boost/compute/config.hpp> +#include <boost/compute/command_queue.hpp> +#include <boost/compute/detail/meta_kernel.hpp> +#include <boost/compute/container/detail/scalar.hpp> +#include <boost/compute/type_traits/result_of.hpp> + +namespace boost { +namespace compute { + +#define BOOST_COMPUTE_DETAIL_INVOKE_ARG(z, n, unused) \ + BOOST_PP_COMMA_IF(n) k.var<BOOST_PP_CAT(T, n)>("arg" BOOST_PP_STRINGIZE(n)) + +#define BOOST_COMPUTE_DETAIL_INVOKE_ADD_ARG(z, n, unused) \ + k.add_set_arg("arg" BOOST_PP_STRINGIZE(n), BOOST_PP_CAT(arg, n)); + +#define BOOST_COMPUTE_DETAIL_DEFINE_INVOKE(z, n, unused) \ +template<class Function, BOOST_PP_ENUM_PARAMS(n, class T)> \ +inline typename result_of<Function(BOOST_PP_ENUM_PARAMS(n, T))>::type \ +invoke(const Function& function, command_queue& queue, BOOST_PP_ENUM_BINARY_PARAMS(n, const T, &arg)) \ +{ \ + typedef typename result_of<Function(BOOST_PP_ENUM_PARAMS(n, T))>::type result_type; \ + detail::meta_kernel k("invoke"); \ + detail::scalar<result_type> result(queue.get_context()); \ + const size_t result_arg = k.add_arg<result_type *>(memory_object::global_memory, "result"); \ + BOOST_PP_REPEAT(n, BOOST_COMPUTE_DETAIL_INVOKE_ADD_ARG, ~) \ + k << "*result = " << function( \ + BOOST_PP_REPEAT(n, BOOST_COMPUTE_DETAIL_INVOKE_ARG, ~) \ + ) << ";"; \ + k.set_arg(result_arg, result.get_buffer()); \ + k.exec(queue); \ + return result.read(queue); \ +} + +BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_DETAIL_DEFINE_INVOKE, ~) + +#undef BOOST_COMPUTE_DETAIL_INVOKE_ARG +#undef BOOST_COMPUTE_DETAIL_INVOKE_ADD_ARG +#undef BOOST_COMPUTE_DETAIL_DEFINE_INVOKE + +#ifdef BOOST_COMPUTE_DOXYGEN_INVOKED +/// Invokes \p function with \p args on \p queue. +/// +/// For example, to invoke the builtin abs() function: +/// \code +/// int result = invoke(abs<int>(), queue, -10); // returns 10 +/// \endcode +template<class Function, class... Args> +inline typename result_of<Function(Args...)>::type +invoke(const Function& function, command_queue& queue, const Args&... args); +#endif // BOOST_COMPUTE_DOXYGEN_INVOKED + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_UTILITY_INVOKE_HPP diff --git a/boost/compute/utility/program_cache.hpp b/boost/compute/utility/program_cache.hpp new file mode 100644 index 0000000000..c80e1a3b1e --- /dev/null +++ b/boost/compute/utility/program_cache.hpp @@ -0,0 +1,172 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_UTILITY_PROGRAM_CACHE_HPP +#define BOOST_COMPUTE_UTILITY_PROGRAM_CACHE_HPP + +#include <string> +#include <utility> + +#include <boost/shared_ptr.hpp> +#include <boost/make_shared.hpp> +#include <boost/noncopyable.hpp> + +#include <boost/compute/context.hpp> +#include <boost/compute/program.hpp> +#include <boost/compute/detail/lru_cache.hpp> +#include <boost/compute/detail/global_static.hpp> + +namespace boost { +namespace compute { + +/// The program_cache class stores \ref program objects in a LRU cache. +/// +/// This class can be used to help mitigate the overhead of OpenCL's run-time +/// kernel compilation model. Commonly used programs can be stored persistently +/// in the cache and only compiled once on their first use. +/// +/// Program objects are stored and retreived based on a user-defined cache key +/// along with the options used to build the program (if any). +/// +/// For example, to insert a program into the cache: +/// \code +/// cache.insert("foo", foo_program); +/// \endcode +/// +/// And to retreive the program later: +/// \code +/// boost::optional<program> p = cache.get("foo"); +/// if(p){ +/// // program found in cache +/// } +/// \endcode +/// +/// \see program +class program_cache : boost::noncopyable +{ +public: + /// Creates a new program cache with space for \p capacity number of + /// program objects. + program_cache(size_t capacity) + : m_cache(capacity) + { + } + + /// Destroys the program cache. + ~program_cache() + { + } + + /// Returns the number of program objects currently stored in the cache. + size_t size() const + { + return m_cache.size(); + } + + /// Returns the total capacity of the cache. + size_t capacity() const + { + return m_cache.capacity(); + } + + /// Clears the program cache. + void clear() + { + m_cache.clear(); + } + + /// Returns the program object with \p key. Returns a null optional if no + /// program with \p key exists in the cache. + boost::optional<program> get(const std::string &key) + { + return m_cache.get(std::make_pair(key, std::string())); + } + + /// Returns the program object with \p key and \p options. Returns a null + /// optional if no program with \p key and \p options exists in the cache. + boost::optional<program> get(const std::string &key, const std::string &options) + { + return m_cache.get(std::make_pair(key, options)); + } + + /// Inserts \p program into the cache with \p key. + void insert(const std::string &key, const program &program) + { + insert(key, std::string(), program); + } + + /// Inserts \p program into the cache with \p key and \p options. + void insert(const std::string &key, const std::string &options, const program &program) + { + m_cache.insert(std::make_pair(key, options), program); + } + + /// Loads the program with \p key from the cache if it exists. Otherwise + /// builds a new program with \p source and \p options, stores it in the + /// cache, and returns it. + /// + /// This is a convenience function to simplify the common pattern of + /// attempting to load a program from the cache and, if not present, + /// building the program from source and storing it in the cache. + /// + /// Equivalent to: + /// \code + /// boost::optional<program> p = get(key, options); + /// if(!p){ + /// p = program::create_with_source(source, context); + /// p->build(options); + /// insert(key, options, *p); + /// } + /// return *p; + /// \endcode + program get_or_build(const std::string &key, + const std::string &options, + const std::string &source, + const context &context) + { + boost::optional<program> p = get(key, options); + if(!p){ + p = program::build_with_source(source, context, options); + + insert(key, options, *p); + } + return *p; + } + + /// Returns the global program cache for \p context. + /// + /// This global cache is used internally by Boost.Compute to store compiled + /// program objects used by its algorithms. All Boost.Compute programs are + /// stored with a cache key beginning with \c "__boost". User programs + /// should avoid using the same prefix in order to prevent collisions. + static boost::shared_ptr<program_cache> get_global_cache(const context &context) + { + typedef detail::lru_cache<cl_context, boost::shared_ptr<program_cache> > cache_map; + + BOOST_COMPUTE_DETAIL_GLOBAL_STATIC(cache_map, caches, (8)); + + boost::optional<boost::shared_ptr<program_cache> > cache = caches.get(context.get()); + if(!cache){ + cache = boost::make_shared<program_cache>(64); + + caches.insert(context.get(), *cache); + } + + return *cache; + } + +private: + detail::lru_cache<std::pair<std::string, std::string>, program> m_cache; +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_UTILITY_PROGRAM_CACHE_HPP diff --git a/boost/compute/utility/source.hpp b/boost/compute/utility/source.hpp new file mode 100644 index 0000000000..3b073fbc82 --- /dev/null +++ b/boost/compute/utility/source.hpp @@ -0,0 +1,39 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_UTILITY_SOURCE_HPP +#define BOOST_COMPUTE_UTILITY_SOURCE_HPP + +/// Stringizes OpenCL source code. +/// +/// For example, to create a simple kernel which squares each input value: +/// \code +/// const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE( +/// __kernel void square(const float *input, float *output) +/// { +/// const uint i = get_global_id(0); +/// const float x = input[i]; +/// output[i] = x * x; +/// } +/// ); +/// +/// // create and build square program +/// program square_program = program::build_with_source(source, context); +/// +/// // create square kernel +/// kernel square_kernel(square_program, "square"); +/// \endcode +#ifdef BOOST_COMPUTE_DOXYGEN_INVOKED +#define BOOST_COMPUTE_STRINGIZE_SOURCE(source) +#else +#define BOOST_COMPUTE_STRINGIZE_SOURCE(...) #__VA_ARGS__ +#endif + +#endif // BOOST_COMPUTE_UTILITY_SOURCE_HPP diff --git a/boost/compute/utility/wait_list.hpp b/boost/compute/utility/wait_list.hpp new file mode 100644 index 0000000000..9a7e74bac0 --- /dev/null +++ b/boost/compute/utility/wait_list.hpp @@ -0,0 +1,203 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_UTILITY_WAIT_LIST_HPP +#define BOOST_COMPUTE_UTILITY_WAIT_LIST_HPP + +#include <vector> + +#include <boost/compute/event.hpp> + +namespace boost { +namespace compute { + +template<class T> class future; + +/// \class wait_list +/// \brief Stores a list of events. +/// +/// The wait_list class stores a set of event objects and can be used to +/// specify dependencies for OpenCL operations or to wait on the host until +/// all of the events have completed. +/// +/// This class also provides convenience fnuctions for interacting with +/// OpenCL APIs which typically accept event dependencies as a \c cl_event* +/// pointer and a \c cl_uint size. For example: +/// \code +/// wait_list events = ...; +/// +/// clEnqueueNDRangeKernel(..., events.get_event_ptr(), events.size(), ...); +/// \endcode +/// +/// \see event, \ref future "future<T>" +class wait_list +{ +public: + typedef std::vector<event>::iterator iterator; + typedef std::vector<event>::const_iterator const_iterator; + + /// Creates an empty wait-list. + wait_list() + { + } + + /// Creates a wait-list containing \p event. + wait_list(const event &event) + { + insert(event); + } + + /// Creates a new wait-list as a copy of \p other. + wait_list(const wait_list &other) + : m_events(other.m_events) + { + } + + /// Copies the events in the wait-list from \p other. + wait_list& operator=(const wait_list &other) + { + if(this != &other){ + m_events = other.m_events; + } + + return *this; + } + + #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES + /// Move-constructs a new wait list object from \p other. + wait_list(wait_list&& other) + : m_events(std::move(other.m_events)) + { + } + + /// Move-assigns the wait list from \p other to \c *this. + wait_list& operator=(wait_list&& other) + { + m_events = std::move(other.m_events); + + return *this; + } + #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES + + /// Destroys the wait-list. + ~wait_list() + { + } + + /// Returns \c true if the wait-list is empty. + bool empty() const + { + return m_events.empty(); + } + + /// Returns the number of events in the wait-list. + uint_ size() const + { + return static_cast<uint_>(m_events.size()); + } + + /// Removes all of the events from the wait-list. + void clear() + { + m_events.clear(); + } + + /// Returns a cl_event pointer to the first event in the wait-list. + /// Returns \c 0 if the wait-list is empty. + /// + /// This can be used to pass the wait-list to OpenCL functions which + /// expect a \c cl_event pointer to refer to a list of events. + const cl_event* get_event_ptr() const + { + if(empty()){ + return 0; + } + + return reinterpret_cast<const cl_event *>(&m_events[0]); + } + + /// Reserves a minimum length of storage for the wait list object. + void reserve(size_t new_capacity) { + m_events.reserve(new_capacity); + } + + /// Inserts \p event into the wait-list. + void insert(const event &event) + { + m_events.push_back(event); + } + + /// Inserts the event from \p future into the wait-list. + template<class T> + void insert(const future<T> &future) + { + insert(future.get_event()); + } + + /// Blocks until all of the events in the wait-list have completed. + /// + /// Does nothing if the wait-list is empty. + void wait() const + { + if(!empty()){ + BOOST_COMPUTE_ASSERT_CL_SUCCESS( + clWaitForEvents(size(), get_event_ptr()) + ); + } + } + + /// Returns a reference to the event at specified location \p pos. + const event& operator[](size_t pos) const { + return m_events[pos]; + } + + /// Returns a reference to the event at specified location \p pos. + event& operator[](size_t pos) { + return m_events[pos]; + } + + /// Returns an iterator to the first element of the wait-list. + iterator begin() { + return m_events.begin(); + } + + /// Returns an iterator to the first element of the wait-list. + const_iterator begin() const { + return m_events.begin(); + } + + /// Returns an iterator to the first element of the wait-list. + const_iterator cbegin() const { + return m_events.begin(); + } + + /// Returns an iterator to the element following the last element of the wait-list. + iterator end() { + return m_events.end(); + } + + /// Returns an iterator to the element following the last element of the wait-list. + const_iterator end() const { + return m_events.end(); + } + + /// Returns an iterator to the element following the last element of the wait-list. + const_iterator cend() const { + return m_events.end(); + } + +private: + std::vector<event> m_events; +}; + +} // end compute namespace +} // end boost namespace + +#endif // BOOST_COMPUTE_UTILITY_WAIT_LIST_HPP diff --git a/boost/compute/version.hpp b/boost/compute/version.hpp new file mode 100644 index 0000000000..fdf6caeeaf --- /dev/null +++ b/boost/compute/version.hpp @@ -0,0 +1,18 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2015 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +#ifndef BOOST_COMPUTE_VERSION_HPP +#define BOOST_COMPUTE_VERSION_HPP + +#define BOOST_COMPUTE_VERSION_MAJOR 0 +#define BOOST_COMPUTE_VERSION_MINOR 5 +#define BOOST_COMPUTE_VERSION_PATCH 0 + +#endif // BOOST_COMPUTE_VERSION_HPP diff --git a/boost/compute/wait_list.hpp b/boost/compute/wait_list.hpp new file mode 100644 index 0000000000..a6bda45991 --- /dev/null +++ b/boost/compute/wait_list.hpp @@ -0,0 +1,12 @@ +//---------------------------------------------------------------------------// +// Copyright (c) 2013-2014 Kyle Lutz <kyle.r.lutz@gmail.com> +// +// Distributed under the Boost Software License, Version 1.0 +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt +// +// See http://boostorg.github.com/compute for more information. +//---------------------------------------------------------------------------// + +// deprecated, use <boost/compute/utility/wait_list.hpp> instead +#include <boost/compute/utility/wait_list.hpp> |