summaryrefslogtreecommitdiff
path: root/boost/compute
diff options
context:
space:
mode:
authorDongHun Kwak <dh0128.kwak@samsung.com>2017-09-13 11:24:46 +0900
committerDongHun Kwak <dh0128.kwak@samsung.com>2017-09-13 11:25:39 +0900
commit4fadd968fa12130524c8380f33fcfe25d4de79e5 (patch)
treefd26a490cd15388d42fc6652b3c5c13012e7f93e /boost/compute
parentb5c87084afaef42b2d058f68091be31988a6a874 (diff)
downloadboost-4fadd968fa12130524c8380f33fcfe25d4de79e5.tar.gz
boost-4fadd968fa12130524c8380f33fcfe25d4de79e5.tar.bz2
boost-4fadd968fa12130524c8380f33fcfe25d4de79e5.zip
Imported Upstream version 1.65.0upstream/1.65.0
Change-Id: Icf8400b375482cb11bcf77440a6934ba360d6ba4 Signed-off-by: DongHun Kwak <dh0128.kwak@samsung.com>
Diffstat (limited to 'boost/compute')
-rw-r--r--boost/compute/algorithm/accumulate.hpp4
-rw-r--r--boost/compute/algorithm/adjacent_difference.hpp3
-rw-r--r--boost/compute/algorithm/adjacent_find.hpp2
-rw-r--r--boost/compute/algorithm/all_of.hpp2
-rw-r--r--boost/compute/algorithm/any_of.hpp2
-rw-r--r--boost/compute/algorithm/binary_search.hpp2
-rw-r--r--boost/compute/algorithm/copy.hpp2
-rw-r--r--boost/compute/algorithm/copy_if.hpp2
-rw-r--r--boost/compute/algorithm/copy_n.hpp2
-rw-r--r--boost/compute/algorithm/count.hpp3
-rw-r--r--boost/compute/algorithm/count_if.hpp3
-rw-r--r--boost/compute/algorithm/detail/copy_on_device.hpp4
-rw-r--r--boost/compute/algorithm/detail/copy_to_device.hpp4
-rw-r--r--boost/compute/algorithm/detail/copy_to_host.hpp4
-rw-r--r--boost/compute/algorithm/detail/find_extrema.hpp2
-rw-r--r--boost/compute/algorithm/detail/find_extrema_with_reduce.hpp1
-rw-r--r--boost/compute/algorithm/detail/find_if_with_atomics.hpp1
-rw-r--r--boost/compute/algorithm/detail/merge_sort_on_gpu.hpp10
-rw-r--r--boost/compute/algorithm/detail/radix_sort.hpp8
-rw-r--r--boost/compute/algorithm/detail/serial_reduce.hpp1
-rw-r--r--boost/compute/algorithm/detail/serial_reduce_by_key.hpp6
-rw-r--r--boost/compute/algorithm/equal.hpp2
-rw-r--r--boost/compute/algorithm/equal_range.hpp2
-rw-r--r--boost/compute/algorithm/exclusive_scan.hpp4
-rw-r--r--boost/compute/algorithm/fill.hpp10
-rw-r--r--boost/compute/algorithm/fill_n.hpp2
-rw-r--r--boost/compute/algorithm/find.hpp2
-rw-r--r--boost/compute/algorithm/find_end.hpp6
-rw-r--r--boost/compute/algorithm/find_if.hpp2
-rw-r--r--boost/compute/algorithm/find_if_not.hpp2
-rw-r--r--boost/compute/algorithm/for_each.hpp2
-rw-r--r--boost/compute/algorithm/for_each_n.hpp2
-rw-r--r--boost/compute/algorithm/gather.hpp2
-rw-r--r--boost/compute/algorithm/generate.hpp2
-rw-r--r--boost/compute/algorithm/generate_n.hpp2
-rw-r--r--boost/compute/algorithm/includes.hpp1
-rw-r--r--boost/compute/algorithm/inclusive_scan.hpp4
-rw-r--r--boost/compute/algorithm/inner_product.hpp3
-rw-r--r--boost/compute/algorithm/inplace_merge.hpp2
-rw-r--r--boost/compute/algorithm/iota.hpp2
-rw-r--r--boost/compute/algorithm/is_partitioned.hpp2
-rw-r--r--boost/compute/algorithm/is_permutation.hpp1
-rw-r--r--boost/compute/algorithm/is_sorted.hpp2
-rw-r--r--boost/compute/algorithm/lexicographical_compare.hpp11
-rw-r--r--boost/compute/algorithm/lower_bound.hpp2
-rw-r--r--boost/compute/algorithm/max_element.hpp3
-rw-r--r--boost/compute/algorithm/merge.hpp2
-rw-r--r--boost/compute/algorithm/min_element.hpp3
-rw-r--r--boost/compute/algorithm/minmax_element.hpp3
-rw-r--r--boost/compute/algorithm/mismatch.hpp2
-rw-r--r--boost/compute/algorithm/next_permutation.hpp1
-rw-r--r--boost/compute/algorithm/none_of.hpp2
-rw-r--r--boost/compute/algorithm/nth_element.hpp2
-rw-r--r--boost/compute/algorithm/partial_sum.hpp4
-rw-r--r--boost/compute/algorithm/partition.hpp2
-rw-r--r--boost/compute/algorithm/partition_copy.hpp2
-rw-r--r--boost/compute/algorithm/partition_point.hpp2
-rw-r--r--boost/compute/algorithm/prev_permutation.hpp1
-rw-r--r--boost/compute/algorithm/random_shuffle.hpp2
-rw-r--r--boost/compute/algorithm/reduce.hpp4
-rw-r--r--boost/compute/algorithm/reduce_by_key.hpp3
-rw-r--r--boost/compute/algorithm/remove.hpp2
-rw-r--r--boost/compute/algorithm/remove_if.hpp2
-rw-r--r--boost/compute/algorithm/replace.hpp2
-rw-r--r--boost/compute/algorithm/replace_copy.hpp2
-rw-r--r--boost/compute/algorithm/reverse.hpp2
-rw-r--r--boost/compute/algorithm/reverse_copy.hpp2
-rw-r--r--boost/compute/algorithm/rotate.hpp2
-rw-r--r--boost/compute/algorithm/rotate_copy.hpp2
-rw-r--r--boost/compute/algorithm/scatter.hpp2
-rw-r--r--boost/compute/algorithm/scatter_if.hpp2
-rw-r--r--boost/compute/algorithm/search.hpp1
-rw-r--r--boost/compute/algorithm/search_n.hpp1
-rw-r--r--boost/compute/algorithm/set_difference.hpp2
-rw-r--r--boost/compute/algorithm/set_intersection.hpp2
-rw-r--r--boost/compute/algorithm/set_symmetric_difference.hpp15
-rw-r--r--boost/compute/algorithm/set_union.hpp2
-rw-r--r--boost/compute/algorithm/sort.hpp2
-rw-r--r--boost/compute/algorithm/sort_by_key.hpp2
-rw-r--r--boost/compute/algorithm/stable_partition.hpp2
-rw-r--r--boost/compute/algorithm/stable_sort.hpp2
-rw-r--r--boost/compute/algorithm/stable_sort_by_key.hpp2
-rw-r--r--boost/compute/algorithm/swap_ranges.hpp2
-rw-r--r--boost/compute/algorithm/transform.hpp2
-rw-r--r--boost/compute/algorithm/transform_if.hpp9
-rw-r--r--boost/compute/algorithm/transform_reduce.hpp3
-rw-r--r--boost/compute/algorithm/unique.hpp2
-rw-r--r--boost/compute/algorithm/unique_copy.hpp2
-rw-r--r--boost/compute/algorithm/upper_bound.hpp2
-rw-r--r--boost/compute/buffer.hpp8
-rw-r--r--boost/compute/cl.hpp48
-rw-r--r--boost/compute/command_queue.hpp59
-rw-r--r--boost/compute/container/detail/scalar.hpp5
-rw-r--r--boost/compute/container/valarray.hpp1
-rw-r--r--boost/compute/context.hpp4
-rw-r--r--boost/compute/detail/buffer_value.hpp4
-rw-r--r--boost/compute/detail/duration.hpp4
-rw-r--r--boost/compute/detail/meta_kernel.hpp4
-rw-r--r--boost/compute/detail/parameter_cache.hpp14
-rw-r--r--boost/compute/detail/path.hpp2
-rw-r--r--boost/compute/detail/read_write_single_value.hpp17
-rw-r--r--boost/compute/device.hpp30
-rw-r--r--boost/compute/event.hpp12
-rw-r--r--boost/compute/exception/opencl_error.hpp6
-rw-r--r--boost/compute/function.hpp13
-rw-r--r--boost/compute/functional/atomic.hpp2
-rw-r--r--boost/compute/image/image1d.hpp8
-rw-r--r--boost/compute/image/image2d.hpp8
-rw-r--r--boost/compute/image/image3d.hpp8
-rw-r--r--boost/compute/image/image_sampler.hpp2
-rw-r--r--boost/compute/interop/opengl/context.hpp6
-rw-r--r--boost/compute/interop/opengl/opengl_texture.hpp2
-rw-r--r--boost/compute/iterator/buffer_iterator.hpp17
-rw-r--r--boost/compute/iterator/counting_iterator.hpp6
-rw-r--r--boost/compute/iterator/function_input_iterator.hpp2
-rw-r--r--boost/compute/iterator/permutation_iterator.hpp6
-rw-r--r--boost/compute/iterator/strided_iterator.hpp10
-rw-r--r--boost/compute/iterator/transform_iterator.hpp6
-rw-r--r--boost/compute/iterator/zip_iterator.hpp4
-rw-r--r--boost/compute/kernel.hpp18
-rw-r--r--boost/compute/lambda/context.hpp35
-rw-r--r--boost/compute/lambda/functional.hpp403
-rw-r--r--boost/compute/memory/svm_ptr.hpp14
-rw-r--r--boost/compute/memory_object.hpp10
-rw-r--r--boost/compute/pipe.hpp4
-rw-r--r--boost/compute/platform.hpp4
-rw-r--r--boost/compute/program.hpp81
-rw-r--r--boost/compute/svm.hpp4
-rw-r--r--boost/compute/system.hpp8
-rw-r--r--boost/compute/type_traits/type_definition.hpp5
-rw-r--r--boost/compute/types/tuple.hpp2
-rw-r--r--boost/compute/user_event.hpp4
132 files changed, 956 insertions, 207 deletions
diff --git a/boost/compute/algorithm/accumulate.hpp b/boost/compute/algorithm/accumulate.hpp
index 328420a07c..be20bee60e 100644
--- a/boost/compute/algorithm/accumulate.hpp
+++ b/boost/compute/algorithm/accumulate.hpp
@@ -26,6 +26,7 @@ namespace boost {
namespace compute {
namespace detail {
+// Space complexity O(1)
template<class InputIterator, class T, class BinaryFunction>
inline T generic_accumulate(InputIterator first,
InputIterator last,
@@ -155,6 +156,9 @@ inline T dispatch_accumulate(InputIterator first,
/// reduce(vec.begin(), vec.end(), &result, plus<float>()); // fast
/// \endcode
///
+/// Space complexity: \Omega(1)<br>
+/// Space complexity when optimized to \c reduce(): \Omega(n)
+///
/// \see reduce()
template<class InputIterator, class T, class BinaryFunction>
inline T accumulate(InputIterator first,
diff --git a/boost/compute/algorithm/adjacent_difference.hpp b/boost/compute/algorithm/adjacent_difference.hpp
index ef13970754..c3b0e7d191 100644
--- a/boost/compute/algorithm/adjacent_difference.hpp
+++ b/boost/compute/algorithm/adjacent_difference.hpp
@@ -64,6 +64,9 @@ dispatch_adjacent_difference(InputIterator first,
///
/// \return \c OutputIterator to the end of the result range
///
+/// Space complexity: \Omega(1)<br>
+/// Space complexity when \p result == \p first: \Omega(n)
+///
/// \see adjacent_find()
template<class InputIterator, class OutputIterator, class BinaryFunction>
inline OutputIterator
diff --git a/boost/compute/algorithm/adjacent_find.hpp b/boost/compute/algorithm/adjacent_find.hpp
index 992a01eddc..a71a817f57 100644
--- a/boost/compute/algorithm/adjacent_find.hpp
+++ b/boost/compute/algorithm/adjacent_find.hpp
@@ -114,6 +114,8 @@ adjacent_find_with_atomics(InputIterator first,
/// \return \c InputIteratorm to the first element which compares equal
/// to the following element. If none are equal, returns \c last.
///
+/// Space complexity: \Omega(1)
+///
/// \see find(), adjacent_difference()
template<class InputIterator, class Compare>
inline InputIterator
diff --git a/boost/compute/algorithm/all_of.hpp b/boost/compute/algorithm/all_of.hpp
index 34d7518f32..56c5809992 100644
--- a/boost/compute/algorithm/all_of.hpp
+++ b/boost/compute/algorithm/all_of.hpp
@@ -20,6 +20,8 @@ namespace compute {
/// Returns \c true if \p predicate returns \c true for all of the elements in
/// the range [\p first, \p last).
///
+/// Space complexity: \Omega(1)
+///
/// \see any_of(), none_of()
template<class InputIterator, class UnaryPredicate>
inline bool all_of(InputIterator first,
diff --git a/boost/compute/algorithm/any_of.hpp b/boost/compute/algorithm/any_of.hpp
index b07779597c..54031fbac5 100644
--- a/boost/compute/algorithm/any_of.hpp
+++ b/boost/compute/algorithm/any_of.hpp
@@ -24,6 +24,8 @@ namespace compute {
///
/// \snippet test/test_any_all_none_of.cpp any_of
///
+/// Space complexity: \Omega(1)
+///
/// \see all_of(), none_of()
template<class InputIterator, class UnaryPredicate>
inline bool any_of(InputIterator first,
diff --git a/boost/compute/algorithm/binary_search.hpp b/boost/compute/algorithm/binary_search.hpp
index 6e19498790..880f3628fb 100644
--- a/boost/compute/algorithm/binary_search.hpp
+++ b/boost/compute/algorithm/binary_search.hpp
@@ -20,6 +20,8 @@ namespace compute {
/// Returns \c true if \p value is in the sorted range [\p first,
/// \p last).
+///
+/// Space complexity: \Omega(1)
template<class InputIterator, class T>
inline bool binary_search(InputIterator first,
InputIterator last,
diff --git a/boost/compute/algorithm/copy.hpp b/boost/compute/algorithm/copy.hpp
index 7779277b82..4866726f6e 100644
--- a/boost/compute/algorithm/copy.hpp
+++ b/boost/compute/algorithm/copy.hpp
@@ -826,6 +826,8 @@ dispatch_copy(InputIterator first,
/// );
/// \endcode
///
+/// Space complexity: \Omega(1)
+///
/// \see copy_n(), copy_if(), copy_async()
template<class InputIterator, class OutputIterator>
inline OutputIterator copy(InputIterator first,
diff --git a/boost/compute/algorithm/copy_if.hpp b/boost/compute/algorithm/copy_if.hpp
index 3cd08ef293..bdedcb8536 100644
--- a/boost/compute/algorithm/copy_if.hpp
+++ b/boost/compute/algorithm/copy_if.hpp
@@ -38,6 +38,8 @@ inline OutputIterator copy_index_if(InputIterator first,
/// Copies each element in the range [\p first, \p last) for which
/// \p predicate returns \c true to the range beginning at \p result.
+///
+/// Space complexity: \Omega(2n)
template<class InputIterator, class OutputIterator, class Predicate>
inline OutputIterator copy_if(InputIterator first,
InputIterator last,
diff --git a/boost/compute/algorithm/copy_n.hpp b/boost/compute/algorithm/copy_n.hpp
index f0989edc67..5280193497 100644
--- a/boost/compute/algorithm/copy_n.hpp
+++ b/boost/compute/algorithm/copy_n.hpp
@@ -30,6 +30,8 @@ namespace compute {
/// boost::compute::copy_n(values, 4, vec.begin(), queue);
/// \endcode
///
+/// Space complexity: \Omega(1)
+///
/// \see copy()
template<class InputIterator, class Size, class OutputIterator>
inline OutputIterator copy_n(InputIterator first,
diff --git a/boost/compute/algorithm/count.hpp b/boost/compute/algorithm/count.hpp
index 140d67379f..7a502c6791 100644
--- a/boost/compute/algorithm/count.hpp
+++ b/boost/compute/algorithm/count.hpp
@@ -23,6 +23,9 @@ namespace compute {
/// Returns the number of occurrences of \p value in the range
/// [\p first, \p last).
///
+/// Space complexity on CPUs: \Omega(1)<br>
+/// Space complexity on GPUs: \Omega(n)
+///
/// \see count_if()
template<class InputIterator, class T>
inline size_t count(InputIterator first,
diff --git a/boost/compute/algorithm/count_if.hpp b/boost/compute/algorithm/count_if.hpp
index c9381ce5d4..81996dc828 100644
--- a/boost/compute/algorithm/count_if.hpp
+++ b/boost/compute/algorithm/count_if.hpp
@@ -25,6 +25,9 @@ namespace compute {
/// Returns the number of elements in the range [\p first, \p last)
/// for which \p predicate returns \c true.
+///
+/// Space complexity on CPUs: \Omega(1)<br>
+/// Space complexity on GPUs: \Omega(n)
template<class InputIterator, class Predicate>
inline size_t count_if(InputIterator first,
InputIterator last,
diff --git a/boost/compute/algorithm/detail/copy_on_device.hpp b/boost/compute/algorithm/detail/copy_on_device.hpp
index 8738c8c0b4..034b3bc212 100644
--- a/boost/compute/algorithm/detail/copy_on_device.hpp
+++ b/boost/compute/algorithm/detail/copy_on_device.hpp
@@ -144,7 +144,7 @@ inline future<OutputIterator> copy_on_device_async(InputIterator first,
return make_future(result + std::distance(first, last), event_);
}
-#ifdef CL_VERSION_2_0
+#ifdef BOOST_COMPUTE_CL_VERSION_2_0
// copy_on_device() specialization for svm_ptr
template<class T>
inline svm_ptr<T> copy_on_device(svm_ptr<T> first,
@@ -181,7 +181,7 @@ inline future<svm_ptr<T> > copy_on_device_async(svm_ptr<T> first,
return make_future(result + count, event_);
}
-#endif // CL_VERSION_2_0
+#endif // BOOST_COMPUTE_CL_VERSION_2_0
} // end detail namespace
} // end compute namespace
diff --git a/boost/compute/algorithm/detail/copy_to_device.hpp b/boost/compute/algorithm/detail/copy_to_device.hpp
index bce5975f53..8601bb20ec 100644
--- a/boost/compute/algorithm/detail/copy_to_device.hpp
+++ b/boost/compute/algorithm/detail/copy_to_device.hpp
@@ -124,7 +124,7 @@ inline future<DeviceIterator> copy_to_device_async(HostIterator first,
return make_future(result + static_cast<difference_type>(count), event_);
}
-#ifdef CL_VERSION_2_0
+#ifdef BOOST_COMPUTE_CL_VERSION_2_0
// copy_to_device() specialization for svm_ptr
template<class HostIterator, class T>
inline svm_ptr<T> copy_to_device(HostIterator first,
@@ -184,7 +184,7 @@ inline svm_ptr<T> copy_to_device_map(HostIterator first,
return result + count;
}
-#endif // CL_VERSION_2_0
+#endif // BOOST_COMPUTE_CL_VERSION_2_0
} // end detail namespace
} // end compute namespace
diff --git a/boost/compute/algorithm/detail/copy_to_host.hpp b/boost/compute/algorithm/detail/copy_to_host.hpp
index d770a996ef..89b57174fa 100644
--- a/boost/compute/algorithm/detail/copy_to_host.hpp
+++ b/boost/compute/algorithm/detail/copy_to_host.hpp
@@ -125,7 +125,7 @@ inline future<HostIterator> copy_to_host_async(DeviceIterator first,
return make_future(iterator_plus_distance(result, count), event_);
}
-#ifdef CL_VERSION_2_0
+#ifdef BOOST_COMPUTE_CL_VERSION_2_0
// copy_to_host() specialization for svm_ptr
template<class T, class HostIterator>
inline HostIterator copy_to_host(svm_ptr<T> first,
@@ -189,7 +189,7 @@ inline HostIterator copy_to_host_map(svm_ptr<T> first,
return iterator_plus_distance(result, count);
}
-#endif // CL_VERSION_2_0
+#endif // BOOST_COMPUTE_CL_VERSION_2_0
} // end detail namespace
} // end compute namespace
diff --git a/boost/compute/algorithm/detail/find_extrema.hpp b/boost/compute/algorithm/detail/find_extrema.hpp
index eef2e36c3c..734b75aa90 100644
--- a/boost/compute/algorithm/detail/find_extrema.hpp
+++ b/boost/compute/algorithm/detail/find_extrema.hpp
@@ -56,7 +56,7 @@ inline InputIterator find_extrema(InputIterator first,
// use serial method for OpenCL version 1.0 due to
// problems with atomic_cmpxchg()
- #ifndef CL_VERSION_1_1
+ #ifndef BOOST_COMPUTE_CL_VERSION_1_1
return serial_find_extrema(first, last, compare, find_minimum, queue);
#endif
diff --git a/boost/compute/algorithm/detail/find_extrema_with_reduce.hpp b/boost/compute/algorithm/detail/find_extrema_with_reduce.hpp
index 8f2a83c38b..515d7cc6da 100644
--- a/boost/compute/algorithm/detail/find_extrema_with_reduce.hpp
+++ b/boost/compute/algorithm/detail/find_extrema_with_reduce.hpp
@@ -246,6 +246,7 @@ inline void find_extrema_with_reduce(InputIterator input,
);
}
+// Space complexity: \Omega(2 * work-group-size * work-groups-per-compute-unit)
template<class InputIterator, class Compare>
InputIterator find_extrema_with_reduce(InputIterator first,
InputIterator last,
diff --git a/boost/compute/algorithm/detail/find_if_with_atomics.hpp b/boost/compute/algorithm/detail/find_if_with_atomics.hpp
index 112c34cf00..e14fd12ae0 100644
--- a/boost/compute/algorithm/detail/find_if_with_atomics.hpp
+++ b/boost/compute/algorithm/detail/find_if_with_atomics.hpp
@@ -153,6 +153,7 @@ inline InputIterator find_if_with_atomics_multiple_vpt(InputIterator first,
return first + static_cast<difference_type>(index.read(queue));
}
+// Space complexity: O(1)
template<class InputIterator, class UnaryPredicate>
inline InputIterator find_if_with_atomics(InputIterator first,
InputIterator last,
diff --git a/boost/compute/algorithm/detail/merge_sort_on_gpu.hpp b/boost/compute/algorithm/detail/merge_sort_on_gpu.hpp
index e62c6beb8d..d5e1a2d8c9 100644
--- a/boost/compute/algorithm/detail/merge_sort_on_gpu.hpp
+++ b/boost/compute/algorithm/detail/merge_sort_on_gpu.hpp
@@ -91,6 +91,7 @@ inline size_t bitonic_block_sort(KeyIterator keys_first,
command_queue &queue)
{
typedef typename std::iterator_traits<KeyIterator>::value_type key_type;
+ typedef typename std::iterator_traits<ValueIterator>::value_type value_type;
meta_kernel k("bitonic_block_sort");
size_t count_arg = k.add_arg<const uint_>("count");
@@ -249,8 +250,11 @@ inline size_t bitonic_block_sort(KeyIterator keys_first,
k.var<key_type>("my_key") << ";\n";
if(sort_by_key)
{
- k << values_first[k.var<const uint_>("gid")] << " = " <<
- values_first[k.var<const uint_>("offset + my_index")] << ";\n";
+ k <<
+ k.decl<value_type>("my_value") << " = " <<
+ values_first[k.var<const uint_>("offset + my_index")] << ";\n" <<
+ "barrier(CLK_GLOBAL_MEM_FENCE);\n" <<
+ values_first[k.var<const uint_>("gid")] << " = my_value;\n";
}
k <<
// end if
@@ -418,7 +422,7 @@ inline void merge_blocks_on_gpu(KeyIterator keys_first,
");\n" <<
"left_idx = equal ? mid_idx + 1 : left_idx + 1;\n" <<
"right_idx = equal ? right_idx : mid_idx;\n" <<
- "upper_key = equal ? upper_key : " <<
+ "upper_key = " <<
keys_first[k.var<const uint_>("left_idx")] << ";\n" <<
"}\n" <<
"}\n" <<
diff --git a/boost/compute/algorithm/detail/radix_sort.hpp b/boost/compute/algorithm/detail/radix_sort.hpp
index 8e6d5f9c0a..53b1205c70 100644
--- a/boost/compute/algorithm/detail/radix_sort.hpp
+++ b/boost/compute/algorithm/detail/radix_sort.hpp
@@ -17,6 +17,9 @@
#include <boost/type_traits/is_signed.hpp>
#include <boost/type_traits/is_floating_point.hpp>
+#include <boost/mpl/and.hpp>
+#include <boost/mpl/not.hpp>
+
#include <boost/compute/kernel.hpp>
#include <boost/compute/program.hpp>
#include <boost/compute/command_queue.hpp>
@@ -305,9 +308,12 @@ inline void radix_sort_impl(const buffer_iterator<T> first,
options << " -DASC";
}
+ // get type definition if it is a custom struct
+ std::string custom_type_def = boost::compute::type_definition<T2>() + "\n";
+
// load radix sort program
program radix_sort_program = cache->get_or_build(
- cache_key, options.str(), radix_sort_source, context
+ cache_key, options.str(), custom_type_def + radix_sort_source, context
);
kernel count_kernel(radix_sort_program, "count");
diff --git a/boost/compute/algorithm/detail/serial_reduce.hpp b/boost/compute/algorithm/detail/serial_reduce.hpp
index 53aaf140fe..8b121274b9 100644
--- a/boost/compute/algorithm/detail/serial_reduce.hpp
+++ b/boost/compute/algorithm/detail/serial_reduce.hpp
@@ -20,6 +20,7 @@ namespace boost {
namespace compute {
namespace detail {
+// Space complexity: O(1)
template<class InputIterator, class OutputIterator, class BinaryFunction>
inline void serial_reduce(InputIterator first,
InputIterator last,
diff --git a/boost/compute/algorithm/detail/serial_reduce_by_key.hpp b/boost/compute/algorithm/detail/serial_reduce_by_key.hpp
index f9bda8e476..6fb04baa6d 100644
--- a/boost/compute/algorithm/detail/serial_reduce_by_key.hpp
+++ b/boost/compute/algorithm/detail/serial_reduce_by_key.hpp
@@ -55,11 +55,9 @@ inline size_t serial_reduce_by_key(InputKeyIterator keys_first,
size_t result_size_arg = k.add_arg<uint_ *>(memory_object::global_memory,
"result_size");
- convert<result_type> to_result_type;
-
k <<
k.decl<result_type>("result") <<
- " = " << to_result_type(values_first[0]) << ";\n" <<
+ " = " << values_first[0] << ";\n" <<
k.decl<key_type>("previous_key") << " = " << keys_first[0] << ";\n" <<
k.decl<result_type>("value") << ";\n" <<
k.decl<key_type>("key") << ";\n" <<
@@ -70,7 +68,7 @@ inline size_t serial_reduce_by_key(InputKeyIterator keys_first,
values_result[0] << " = result;\n" <<
"for(ulong i = 1; i < count; i++) {\n" <<
- " value = " << to_result_type(values_first[k.var<uint_>("i")]) << ";\n" <<
+ " value = " << values_first[k.var<uint_>("i")] << ";\n" <<
" key = " << keys_first[k.var<uint_>("i")] << ";\n" <<
" if (" << predicate(k.var<key_type>("previous_key"),
k.var<key_type>("key")) << ") {\n" <<
diff --git a/boost/compute/algorithm/equal.hpp b/boost/compute/algorithm/equal.hpp
index 35d0c5f0ea..c3c8053b71 100644
--- a/boost/compute/algorithm/equal.hpp
+++ b/boost/compute/algorithm/equal.hpp
@@ -20,6 +20,8 @@ namespace compute {
/// Returns \c true if the range [\p first1, \p last1) and the range
/// beginning at \p first2 are equal.
+///
+/// Space complexity: \Omega(1)
template<class InputIterator1, class InputIterator2>
inline bool equal(InputIterator1 first1,
InputIterator1 last1,
diff --git a/boost/compute/algorithm/equal_range.hpp b/boost/compute/algorithm/equal_range.hpp
index fd82177324..d7008e3cf4 100644
--- a/boost/compute/algorithm/equal_range.hpp
+++ b/boost/compute/algorithm/equal_range.hpp
@@ -23,6 +23,8 @@ namespace compute {
/// Returns a pair of iterators containing the range of values equal
/// to \p value in the sorted range [\p first, \p last).
+///
+/// Space complexity: \Omega(1)
template<class InputIterator, class T>
inline std::pair<InputIterator, InputIterator>
equal_range(InputIterator first,
diff --git a/boost/compute/algorithm/exclusive_scan.hpp b/boost/compute/algorithm/exclusive_scan.hpp
index 205d3de658..806a172cf4 100644
--- a/boost/compute/algorithm/exclusive_scan.hpp
+++ b/boost/compute/algorithm/exclusive_scan.hpp
@@ -44,6 +44,10 @@ namespace compute {
///
/// \snippet test/test_scan.cpp exclusive_scan_int_multiplies
///
+/// Space complexity on GPUs: \Omega(n)<br>
+/// Space complexity on GPUs when \p first == \p result: \Omega(2n)<br>
+/// Space complexity on CPUs: \Omega(1)
+///
/// \see inclusive_scan()
template<class InputIterator, class OutputIterator, class T, class BinaryOperator>
inline OutputIterator
diff --git a/boost/compute/algorithm/fill.hpp b/boost/compute/algorithm/fill.hpp
index c711f46b94..646d8acda4 100644
--- a/boost/compute/algorithm/fill.hpp
+++ b/boost/compute/algorithm/fill.hpp
@@ -64,7 +64,7 @@ inline future<void> fill_async_with_copy(BufferIterator first,
);
}
-#if defined(CL_VERSION_1_2)
+#if defined(BOOST_COMPUTE_CL_VERSION_1_2)
// meta-function returing true if Iterator points to a range of values
// that can be filled using clEnqueueFillBuffer(). to meet this criteria
@@ -172,7 +172,7 @@ dispatch_fill_async(BufferIterator first,
return future<void>(event_);
}
-#ifdef CL_VERSION_2_0
+#ifdef BOOST_COMPUTE_CL_VERSION_2_0
// specializations for svm_ptr<T>
template<class T>
inline void dispatch_fill(svm_ptr<T> first,
@@ -205,7 +205,7 @@ inline future<void> dispatch_fill_async(svm_ptr<T> first,
return future<void>(event_);
}
-#endif // CL_VERSION_2_0
+#endif // BOOST_COMPUTE_CL_VERSION_2_0
// default implementations
template<class BufferIterator, class T>
@@ -251,7 +251,7 @@ inline future<void> dispatch_fill_async(BufferIterator first,
{
return fill_async_with_copy(first, count, value, queue);
}
-#endif // !defined(CL_VERSION_1_2)
+#endif // !defined(BOOST_COMPUTE_CL_VERSION_1_2)
} // end detail namespace
@@ -271,6 +271,8 @@ inline future<void> dispatch_fill_async(BufferIterator first,
/// boost::compute::fill(vec.begin(), vec.end(), 7, queue);
/// \endcode
///
+/// Space complexity: \Omega(1)
+///
/// \see boost::compute::fill_n()
template<class BufferIterator, class T>
inline void fill(BufferIterator first,
diff --git a/boost/compute/algorithm/fill_n.hpp b/boost/compute/algorithm/fill_n.hpp
index 18a8f706a5..6be2d280a6 100644
--- a/boost/compute/algorithm/fill_n.hpp
+++ b/boost/compute/algorithm/fill_n.hpp
@@ -20,6 +20,8 @@ namespace compute {
/// Fills the range [\p first, \p first + count) with \p value.
///
+/// Space complexity: \Omega(1)
+///
/// \see fill()
template<class BufferIterator, class Size, class T>
inline void fill_n(BufferIterator first,
diff --git a/boost/compute/algorithm/find.hpp b/boost/compute/algorithm/find.hpp
index ef3ebf0c47..a6225b8c99 100644
--- a/boost/compute/algorithm/find.hpp
+++ b/boost/compute/algorithm/find.hpp
@@ -22,6 +22,8 @@ namespace compute {
/// Returns an iterator pointing to the first element in the range
/// [\p first, \p last) that equals \p value.
+///
+/// Space complexity: \Omega(1)
template<class InputIterator, class T>
inline InputIterator find(InputIterator first,
InputIterator last,
diff --git a/boost/compute/algorithm/find_end.hpp b/boost/compute/algorithm/find_end.hpp
index 265a1da542..a0a1b2e8c9 100644
--- a/boost/compute/algorithm/find_end.hpp
+++ b/boost/compute/algorithm/find_end.hpp
@@ -26,8 +26,8 @@ namespace detail {
///
/// \brief Helper function for find_end
///
-/// Basically a copy of find_if which returns last occurence
-/// instead of first occurence
+/// Basically a copy of find_if which returns last occurrence
+/// instead of first occurrence
///
template<class InputIterator, class UnaryPredicate>
inline InputIterator find_end_helper(InputIterator first,
@@ -90,6 +90,8 @@ inline InputIterator find_end_helper(InputIterator first,
/// \param p_last Iterator pointing to end of pattern
/// \param queue Queue on which to execute
///
+/// Space complexity: \Omega(n)
+///
template<class TextIterator, class PatternIterator>
inline TextIterator find_end(TextIterator t_first,
TextIterator t_last,
diff --git a/boost/compute/algorithm/find_if.hpp b/boost/compute/algorithm/find_if.hpp
index db99cc0396..074b47e280 100644
--- a/boost/compute/algorithm/find_if.hpp
+++ b/boost/compute/algorithm/find_if.hpp
@@ -20,6 +20,8 @@ namespace compute {
/// Returns an iterator pointing to the first element in the range
/// [\p first, \p last) for which \p predicate returns \c true.
+///
+/// Space complexity: \Omega(1)
template<class InputIterator, class UnaryPredicate>
inline InputIterator find_if(InputIterator first,
InputIterator last,
diff --git a/boost/compute/algorithm/find_if_not.hpp b/boost/compute/algorithm/find_if_not.hpp
index 61de050d31..a008a99469 100644
--- a/boost/compute/algorithm/find_if_not.hpp
+++ b/boost/compute/algorithm/find_if_not.hpp
@@ -22,6 +22,8 @@ namespace compute {
/// Returns an iterator pointing to the first element in the range
/// [\p first, \p last) for which \p predicate returns \c false.
///
+/// Space complexity: \Omega(1)
+///
/// \see find_if()
template<class InputIterator, class UnaryPredicate>
inline InputIterator find_if_not(InputIterator first,
diff --git a/boost/compute/algorithm/for_each.hpp b/boost/compute/algorithm/for_each.hpp
index 3ed399e6e9..7afba2b5f5 100644
--- a/boost/compute/algorithm/for_each.hpp
+++ b/boost/compute/algorithm/for_each.hpp
@@ -45,6 +45,8 @@ struct for_each_kernel : public meta_kernel
/// Calls \p function on each element in the range [\p first, \p last).
///
+/// Space complexity: \Omega(1)
+///
/// \see transform()
template<class InputIterator, class UnaryFunction>
inline UnaryFunction for_each(InputIterator first,
diff --git a/boost/compute/algorithm/for_each_n.hpp b/boost/compute/algorithm/for_each_n.hpp
index d0be784bf7..77932ab209 100644
--- a/boost/compute/algorithm/for_each_n.hpp
+++ b/boost/compute/algorithm/for_each_n.hpp
@@ -19,6 +19,8 @@ namespace compute {
/// Calls \p function on each element in the range [\p first, \p first
/// \c + \p count).
///
+/// Space complexity: \Omega(1)
+///
/// \see for_each()
template<class InputIterator, class Size, class UnaryFunction>
inline UnaryFunction for_each_n(InputIterator first,
diff --git a/boost/compute/algorithm/gather.hpp b/boost/compute/algorithm/gather.hpp
index 24c5c727ae..62442587f7 100644
--- a/boost/compute/algorithm/gather.hpp
+++ b/boost/compute/algorithm/gather.hpp
@@ -62,6 +62,8 @@ private:
/// to the range beginning at \p result using the input values from the range
/// beginning at \p input.
///
+/// Space complexity: \Omega(1)
+///
/// \see scatter()
template<class InputIterator, class MapIterator, class OutputIterator>
inline void gather(MapIterator first,
diff --git a/boost/compute/algorithm/generate.hpp b/boost/compute/algorithm/generate.hpp
index c70a542683..9ac76a3dca 100644
--- a/boost/compute/algorithm/generate.hpp
+++ b/boost/compute/algorithm/generate.hpp
@@ -22,6 +22,8 @@ namespace compute {
/// Stores the result of \p generator for each element in the range
/// [\p first, \p last).
+///
+/// Space complexity: \Omega(1)
template<class OutputIterator, class Generator>
inline void generate(OutputIterator first,
OutputIterator last,
diff --git a/boost/compute/algorithm/generate_n.hpp b/boost/compute/algorithm/generate_n.hpp
index 6d8e607b64..066a831ddf 100644
--- a/boost/compute/algorithm/generate_n.hpp
+++ b/boost/compute/algorithm/generate_n.hpp
@@ -20,6 +20,8 @@ namespace compute {
/// Stores the result of \p generator for each element in the range
/// [\p first, \p first + \p count).
+///
+/// Space complexity: \Omega(1)
template<class OutputIterator, class Size, class Generator>
inline void generate_n(OutputIterator first,
Size count,
diff --git a/boost/compute/algorithm/includes.hpp b/boost/compute/algorithm/includes.hpp
index c4e7c793e7..cfef9540a7 100644
--- a/boost/compute/algorithm/includes.hpp
+++ b/boost/compute/algorithm/includes.hpp
@@ -110,6 +110,7 @@ private:
/// \param last2 Iterator pointing to end of second set
/// \param queue Queue on which to execute
///
+/// Space complexity: \Omega(distance(\p first1, \p last1) + distance(\p first2, \p last2))
template<class InputIterator1, class InputIterator2>
inline bool includes(InputIterator1 first1,
InputIterator1 last1,
diff --git a/boost/compute/algorithm/inclusive_scan.hpp b/boost/compute/algorithm/inclusive_scan.hpp
index 9f98beaf7c..84f1b8cbf7 100644
--- a/boost/compute/algorithm/inclusive_scan.hpp
+++ b/boost/compute/algorithm/inclusive_scan.hpp
@@ -42,6 +42,10 @@ namespace compute {
///
/// \snippet test/test_scan.cpp inclusive_scan_int_multiplies
///
+/// Space complexity on GPUs: \Omega(n)<br>
+/// Space complexity on GPUs when \p first == \p result: \Omega(2n)<br>
+/// Space complexity on CPUs: \Omega(1)
+///
/// \see exclusive_scan()
template<class InputIterator, class OutputIterator, class BinaryOperator>
inline OutputIterator
diff --git a/boost/compute/algorithm/inner_product.hpp b/boost/compute/algorithm/inner_product.hpp
index 614611f91e..0aeaf9110e 100644
--- a/boost/compute/algorithm/inner_product.hpp
+++ b/boost/compute/algorithm/inner_product.hpp
@@ -26,6 +26,9 @@ namespace compute {
/// Returns the inner product of the elements in the range
/// [\p first1, \p last1) with the elements in the range beginning
/// at \p first2.
+///
+/// Space complexity: \Omega(1)<br>
+/// Space complexity when binary operator is recognized as associative: \Omega(n)
template<class InputIterator1, class InputIterator2, class T>
inline T inner_product(InputIterator1 first1,
InputIterator1 last1,
diff --git a/boost/compute/algorithm/inplace_merge.hpp b/boost/compute/algorithm/inplace_merge.hpp
index 3080950df5..91f5be5335 100644
--- a/boost/compute/algorithm/inplace_merge.hpp
+++ b/boost/compute/algorithm/inplace_merge.hpp
@@ -23,6 +23,8 @@ namespace compute {
/// Merges the sorted values in the range [\p first, \p middle) with
/// the sorted values in the range [\p middle, \p last) in-place.
+///
+/// Space complexity: \Omega(n)
template<class Iterator>
inline void inplace_merge(Iterator first,
Iterator middle,
diff --git a/boost/compute/algorithm/iota.hpp b/boost/compute/algorithm/iota.hpp
index 084c3d8d97..4cd7aa9c7b 100644
--- a/boost/compute/algorithm/iota.hpp
+++ b/boost/compute/algorithm/iota.hpp
@@ -26,6 +26,8 @@ namespace compute {
/// \snippet test/test_iota.cpp iota
///
/// Will fill \c vec with the values (\c 0, \c 1, \c 2, \c ...).
+///
+/// Space complexity: \Omega(1)
template<class BufferIterator, class T>
inline void iota(BufferIterator first,
BufferIterator last,
diff --git a/boost/compute/algorithm/is_partitioned.hpp b/boost/compute/algorithm/is_partitioned.hpp
index 3916825057..6ad24f240f 100644
--- a/boost/compute/algorithm/is_partitioned.hpp
+++ b/boost/compute/algorithm/is_partitioned.hpp
@@ -21,6 +21,8 @@ namespace compute {
/// Returns \c true if the values in the range [\p first, \p last)
/// are partitioned according to \p predicate.
+///
+/// Space complexity: \Omega(1)
template<class InputIterator, class UnaryPredicate>
inline bool is_partitioned(InputIterator first,
InputIterator last,
diff --git a/boost/compute/algorithm/is_permutation.hpp b/boost/compute/algorithm/is_permutation.hpp
index 1e502efb37..88b89b7973 100644
--- a/boost/compute/algorithm/is_permutation.hpp
+++ b/boost/compute/algorithm/is_permutation.hpp
@@ -36,6 +36,7 @@ namespace compute {
/// \param last2 Iterator pointing to end of second range
/// \param queue Queue on which to execute
///
+/// Space complexity: \Omega(distance(\p first1, \p last1) + distance(\p first2, \p last2))
template<class InputIterator1, class InputIterator2>
inline bool is_permutation(InputIterator1 first1,
InputIterator1 last1,
diff --git a/boost/compute/algorithm/is_sorted.hpp b/boost/compute/algorithm/is_sorted.hpp
index a605159ac3..7441620978 100644
--- a/boost/compute/algorithm/is_sorted.hpp
+++ b/boost/compute/algorithm/is_sorted.hpp
@@ -30,6 +30,8 @@ namespace compute {
///
/// \return \c true if the range [\p first, \p last) is sorted
///
+/// Space complexity: \Omega(1)
+///
/// \see sort()
template<class InputIterator, class Compare>
inline bool is_sorted(InputIterator first,
diff --git a/boost/compute/algorithm/lexicographical_compare.hpp b/boost/compute/algorithm/lexicographical_compare.hpp
index c4f7120807..952e678a68 100644
--- a/boost/compute/algorithm/lexicographical_compare.hpp
+++ b/boost/compute/algorithm/lexicographical_compare.hpp
@@ -42,10 +42,10 @@ const char lexicographical_compare_source[] =
template<class InputIterator1, class InputIterator2>
inline bool dispatch_lexicographical_compare(InputIterator1 first1,
- InputIterator1 last1,
- InputIterator2 first2,
- InputIterator2 last2,
- command_queue &queue)
+ InputIterator1 last1,
+ InputIterator2 first2,
+ InputIterator2 last2,
+ command_queue &queue)
{
const boost::compute::context &context = queue.get_context();
@@ -103,6 +103,9 @@ inline bool dispatch_lexicographical_compare(InputIterator1 first1,
/// Checks if the first range [first1, last1) is lexicographically
/// less than the second range [first2, last2).
+///
+/// Space complexity:
+/// \Omega(max(distance(\p first1, \p last1), distance(\p first2, \p last2)))
template<class InputIterator1, class InputIterator2>
inline bool lexicographical_compare(InputIterator1 first1,
InputIterator1 last1,
diff --git a/boost/compute/algorithm/lower_bound.hpp b/boost/compute/algorithm/lower_bound.hpp
index b2011c66ef..f78bbd6364 100644
--- a/boost/compute/algorithm/lower_bound.hpp
+++ b/boost/compute/algorithm/lower_bound.hpp
@@ -22,6 +22,8 @@ namespace compute {
/// Returns an iterator pointing to the first element in the sorted
/// range [\p first, \p last) that is not less than \p value.
///
+/// Space complexity: \Omega(1)
+///
/// \see upper_bound()
template<class InputIterator, class T>
inline InputIterator
diff --git a/boost/compute/algorithm/max_element.hpp b/boost/compute/algorithm/max_element.hpp
index 55f2f7ffbf..f9df37420c 100644
--- a/boost/compute/algorithm/max_element.hpp
+++ b/boost/compute/algorithm/max_element.hpp
@@ -43,6 +43,9 @@ namespace compute {
/// boost::compute::max_element(data.begin(), data.end(), compare_first, queue);
/// \endcode
///
+/// Space complexity on CPUs: \Omega(1)<br>
+/// Space complexity on GPUs: \Omega(N)
+///
/// \see min_element()
template<class InputIterator, class Compare>
inline InputIterator
diff --git a/boost/compute/algorithm/merge.hpp b/boost/compute/algorithm/merge.hpp
index 875a283044..ff3e6e879a 100644
--- a/boost/compute/algorithm/merge.hpp
+++ b/boost/compute/algorithm/merge.hpp
@@ -37,6 +37,8 @@ namespace compute {
///
/// \return \c OutputIterator to the end of the result range
///
+/// Space complexity: \Omega(distance(\p first1, \p last1) + distance(\p first2, \p last2))
+///
/// \see inplace_merge()
template<class InputIterator1,
class InputIterator2,
diff --git a/boost/compute/algorithm/min_element.hpp b/boost/compute/algorithm/min_element.hpp
index 62744efb98..b52e2670cb 100644
--- a/boost/compute/algorithm/min_element.hpp
+++ b/boost/compute/algorithm/min_element.hpp
@@ -43,6 +43,9 @@ namespace compute {
/// boost::compute::min_element(data.begin(), data.end(), compare_first, queue);
/// \endcode
///
+/// Space complexity on CPUs: \Omega(1)<br>
+/// Space complexity on GPUs: \Omega(N)
+///
/// \see max_element()
template<class InputIterator, class Compare>
inline InputIterator
diff --git a/boost/compute/algorithm/minmax_element.hpp b/boost/compute/algorithm/minmax_element.hpp
index 3f44c09eaf..4b2aae6dee 100644
--- a/boost/compute/algorithm/minmax_element.hpp
+++ b/boost/compute/algorithm/minmax_element.hpp
@@ -31,6 +31,9 @@ namespace compute {
/// argument is less than (i.e. is ordered before) the second.
/// \param queue command queue to perform the operation
///
+/// Space complexity on CPUs: \Omega(1)<br>
+/// Space complexity on GPUs: \Omega(N)
+///
/// \see max_element(), min_element()
template<class InputIterator, class Compare>
inline std::pair<InputIterator, InputIterator>
diff --git a/boost/compute/algorithm/mismatch.hpp b/boost/compute/algorithm/mismatch.hpp
index e7db883004..ff31f49f97 100644
--- a/boost/compute/algorithm/mismatch.hpp
+++ b/boost/compute/algorithm/mismatch.hpp
@@ -28,6 +28,8 @@ namespace compute {
/// Returns a pair of iterators pointing to the first position where the
/// range [\p first1, \p last1) and the range starting at \p first2
/// differ.
+///
+/// Space complexity: \Omega(1)
template<class InputIterator1, class InputIterator2>
inline std::pair<InputIterator1, InputIterator2>
mismatch(InputIterator1 first1,
diff --git a/boost/compute/algorithm/next_permutation.hpp b/boost/compute/algorithm/next_permutation.hpp
index e81fbd2ee8..061ea1efe9 100644
--- a/boost/compute/algorithm/next_permutation.hpp
+++ b/boost/compute/algorithm/next_permutation.hpp
@@ -131,6 +131,7 @@ inline InputIterator np_ceiling(InputIterator first,
/// \param last Iterator pointing to end of range
/// \param queue Queue on which to execute
///
+/// Space complexity: \Omega(1)
template<class InputIterator>
inline bool next_permutation(InputIterator first,
InputIterator last,
diff --git a/boost/compute/algorithm/none_of.hpp b/boost/compute/algorithm/none_of.hpp
index c25dd12a87..fc3ed94bc1 100644
--- a/boost/compute/algorithm/none_of.hpp
+++ b/boost/compute/algorithm/none_of.hpp
@@ -20,6 +20,8 @@ namespace compute {
/// Returns \c true if \p predicate returns \c true for none of the elements in
/// the range [\p first, \p last).
///
+/// Space complexity: \Omega(1)
+///
/// \see all_of(), any_of()
template<class InputIterator, class UnaryPredicate>
inline bool none_of(InputIterator first,
diff --git a/boost/compute/algorithm/nth_element.hpp b/boost/compute/algorithm/nth_element.hpp
index 68f7a3dbc0..93344271dd 100644
--- a/boost/compute/algorithm/nth_element.hpp
+++ b/boost/compute/algorithm/nth_element.hpp
@@ -23,6 +23,8 @@ namespace compute {
/// Rearranges the elements in the range [\p first, \p last) such that
/// the \p nth element would be in that position in a sorted sequence.
+///
+/// Space complexity: \Omega(3n)
template<class Iterator, class Compare>
inline void nth_element(Iterator first,
Iterator nth,
diff --git a/boost/compute/algorithm/partial_sum.hpp b/boost/compute/algorithm/partial_sum.hpp
index d440369a5a..53d36a9db0 100644
--- a/boost/compute/algorithm/partial_sum.hpp
+++ b/boost/compute/algorithm/partial_sum.hpp
@@ -21,6 +21,10 @@ namespace compute {
/// Calculates the cumulative sum of the elements in the range [\p first,
/// \p last) and writes the resulting values to the range beginning at
/// \p result.
+///
+/// Space complexity on GPUs: \Omega(n)<br>
+/// Space complexity on GPUs when \p first == \p result: \Omega(2n)<br>
+/// Space complexity on CPUs: \Omega(1)
template<class InputIterator, class OutputIterator>
inline OutputIterator
partial_sum(InputIterator first,
diff --git a/boost/compute/algorithm/partition.hpp b/boost/compute/algorithm/partition.hpp
index 7860350e0d..59d0c78f7e 100644
--- a/boost/compute/algorithm/partition.hpp
+++ b/boost/compute/algorithm/partition.hpp
@@ -22,6 +22,8 @@ namespace compute {
/// Partitions the elements in the range [\p first, \p last) according to
/// \p predicate. Order of the elements need not be preserved.
///
+/// Space complexity: \Omega(3n)
+///
/// \see is_partitioned() and stable_partition()
///
template<class Iterator, class UnaryPredicate>
diff --git a/boost/compute/algorithm/partition_copy.hpp b/boost/compute/algorithm/partition_copy.hpp
index 80a2c6475f..3215ec0736 100644
--- a/boost/compute/algorithm/partition_copy.hpp
+++ b/boost/compute/algorithm/partition_copy.hpp
@@ -24,6 +24,8 @@ namespace compute {
/// and all of the elements for which \p predicate returns \c false to
/// the range beginning at \p first_false.
///
+/// Space complexity: \Omega(2n)
+///
/// \see partition()
template<class InputIterator,
class OutputIterator1,
diff --git a/boost/compute/algorithm/partition_point.hpp b/boost/compute/algorithm/partition_point.hpp
index 3cc2bc0ca6..748824512d 100644
--- a/boost/compute/algorithm/partition_point.hpp
+++ b/boost/compute/algorithm/partition_point.hpp
@@ -29,6 +29,8 @@ namespace compute {
/// \param predicate Unary predicate to be applied on each element
/// \param queue Queue on which to execute
///
+/// Space complexity: \Omega(1)
+///
/// \see partition() and stable_partition()
///
template<class InputIterator, class UnaryPredicate>
diff --git a/boost/compute/algorithm/prev_permutation.hpp b/boost/compute/algorithm/prev_permutation.hpp
index 03c01bf8f4..ea20835caa 100644
--- a/boost/compute/algorithm/prev_permutation.hpp
+++ b/boost/compute/algorithm/prev_permutation.hpp
@@ -131,6 +131,7 @@ inline InputIterator pp_floor(InputIterator first,
/// \param last Iterator pointing to end of range
/// \param queue Queue on which to execute
///
+/// Space complexity: \Omega(1)
template<class InputIterator>
inline bool prev_permutation(InputIterator first,
InputIterator last,
diff --git a/boost/compute/algorithm/random_shuffle.hpp b/boost/compute/algorithm/random_shuffle.hpp
index 7d2d46a133..8e020830a5 100644
--- a/boost/compute/algorithm/random_shuffle.hpp
+++ b/boost/compute/algorithm/random_shuffle.hpp
@@ -28,6 +28,8 @@ namespace compute {
/// Randomly shuffles the elements in the range [\p first, \p last).
///
+/// Space complexity: \Omega(2n)
+///
/// \see scatter()
template<class Iterator>
inline void random_shuffle(Iterator first,
diff --git a/boost/compute/algorithm/reduce.hpp b/boost/compute/algorithm/reduce.hpp
index 19d070019f..e71d90fe24 100644
--- a/boost/compute/algorithm/reduce.hpp
+++ b/boost/compute/algorithm/reduce.hpp
@@ -153,6 +153,7 @@ block_reduce(InputIterator first,
return result_vector;
}
+// Space complexity: O( ceil(n / 2 / 256) )
template<class InputIterator, class OutputIterator, class BinaryFunction>
inline void generic_reduce(InputIterator first,
InputIterator last,
@@ -264,6 +265,9 @@ inline void dispatch_reduce(InputIterator first,
/// efficient on parallel hardware. For more information, see the documentation
/// on the \c accumulate() algorithm.
///
+/// Space complexity on GPUs: \Omega(n)<br>
+/// Space complexity on CPUs: \Omega(1)
+///
/// \see accumulate()
template<class InputIterator, class OutputIterator, class BinaryFunction>
inline void reduce(InputIterator first,
diff --git a/boost/compute/algorithm/reduce_by_key.hpp b/boost/compute/algorithm/reduce_by_key.hpp
index 87c73e887f..1a233c7dd4 100644
--- a/boost/compute/algorithm/reduce_by_key.hpp
+++ b/boost/compute/algorithm/reduce_by_key.hpp
@@ -51,6 +51,9 @@ namespace compute {
///
/// \snippet test/test_reduce_by_key.cpp reduce_by_key_int
///
+/// Space complexity on GPUs: \Omega(2n)<br>
+/// Space complexity on CPUs: \Omega(1)
+///
/// \see reduce()
template<class InputKeyIterator, class InputValueIterator,
class OutputKeyIterator, class OutputValueIterator,
diff --git a/boost/compute/algorithm/remove.hpp b/boost/compute/algorithm/remove.hpp
index 98feb1f9d8..086ba8cc7f 100644
--- a/boost/compute/algorithm/remove.hpp
+++ b/boost/compute/algorithm/remove.hpp
@@ -22,6 +22,8 @@ namespace compute {
/// Removes each element equal to \p value in the range [\p first,
/// \p last).
///
+/// Space complexity: \Omega(3n)
+///
/// \see remove_if()
template<class Iterator, class T>
inline Iterator remove(Iterator first,
diff --git a/boost/compute/algorithm/remove_if.hpp b/boost/compute/algorithm/remove_if.hpp
index 5e416bef88..9aece18bbd 100644
--- a/boost/compute/algorithm/remove_if.hpp
+++ b/boost/compute/algorithm/remove_if.hpp
@@ -22,6 +22,8 @@ namespace compute {
/// Removes each element for which \p predicate returns \c true in the
/// range [\p first, \p last).
///
+/// Space complexity: \Omega(3n)
+///
/// \see remove()
template<class Iterator, class Predicate>
inline Iterator remove_if(Iterator first,
diff --git a/boost/compute/algorithm/replace.hpp b/boost/compute/algorithm/replace.hpp
index fd649a2fad..336c1d3e0f 100644
--- a/boost/compute/algorithm/replace.hpp
+++ b/boost/compute/algorithm/replace.hpp
@@ -68,6 +68,8 @@ private:
/// Replaces each instance of \p old_value in the range [\p first,
/// \p last) with \p new_value.
+///
+/// Space complexity: \Omega(1)
template<class Iterator, class T>
inline void replace(Iterator first,
Iterator last,
diff --git a/boost/compute/algorithm/replace_copy.hpp b/boost/compute/algorithm/replace_copy.hpp
index 7224bd3ae6..34f61b514f 100644
--- a/boost/compute/algorithm/replace_copy.hpp
+++ b/boost/compute/algorithm/replace_copy.hpp
@@ -25,6 +25,8 @@ namespace compute {
/// beginning at \p result while replacing each instance of \p old_value
/// with \p new_value.
///
+/// Space complexity: \Omega(1)
+///
/// \see replace()
template<class InputIterator, class OutputIterator, class T>
inline OutputIterator
diff --git a/boost/compute/algorithm/reverse.hpp b/boost/compute/algorithm/reverse.hpp
index b6a9e8098c..15fe5533ac 100644
--- a/boost/compute/algorithm/reverse.hpp
+++ b/boost/compute/algorithm/reverse.hpp
@@ -52,6 +52,8 @@ struct reverse_kernel : public meta_kernel
/// Reverses the elements in the range [\p first, \p last).
///
+/// Space complexity: \Omega(1)
+///
/// \see reverse_copy()
template<class Iterator>
inline void reverse(Iterator first,
diff --git a/boost/compute/algorithm/reverse_copy.hpp b/boost/compute/algorithm/reverse_copy.hpp
index c839f44651..9fda9d4e27 100644
--- a/boost/compute/algorithm/reverse_copy.hpp
+++ b/boost/compute/algorithm/reverse_copy.hpp
@@ -51,6 +51,8 @@ struct reverse_copy_kernel : public meta_kernel
/// Copies the elements in the range [\p first, \p last) in reversed
/// order to the range beginning at \p result.
///
+/// Space complexity: \Omega(1)
+///
/// \see reverse()
template<class InputIterator, class OutputIterator>
inline OutputIterator
diff --git a/boost/compute/algorithm/rotate.hpp b/boost/compute/algorithm/rotate.hpp
index 54cb073cc2..715699340d 100644
--- a/boost/compute/algorithm/rotate.hpp
+++ b/boost/compute/algorithm/rotate.hpp
@@ -21,6 +21,8 @@ namespace compute {
/// Performs left rotation such that element at \p n_first comes to the
/// beginning.
///
+/// Space complexity: \Omega(distance(\p first, \p last))
+///
/// \see rotate_copy()
template<class InputIterator>
inline void rotate(InputIterator first,
diff --git a/boost/compute/algorithm/rotate_copy.hpp b/boost/compute/algorithm/rotate_copy.hpp
index fa1b44c5e5..679b3c998b 100644
--- a/boost/compute/algorithm/rotate_copy.hpp
+++ b/boost/compute/algorithm/rotate_copy.hpp
@@ -20,6 +20,8 @@ namespace compute {
/// Performs left rotation such that element at n_first comes to the
/// beginning and the output is stored in range starting at result.
///
+/// Space complexity: \Omega(1)
+///
/// \see rotate()
template<class InputIterator, class OutputIterator>
inline void rotate_copy(InputIterator first,
diff --git a/boost/compute/algorithm/scatter.hpp b/boost/compute/algorithm/scatter.hpp
index bea4201628..8ae5a99443 100644
--- a/boost/compute/algorithm/scatter.hpp
+++ b/boost/compute/algorithm/scatter.hpp
@@ -79,6 +79,8 @@ private:
/// beginning at \p result using the output indices from the range beginning
/// at \p map.
///
+/// Space complexity: \Omega(1)
+///
/// \see gather()
template<class InputIterator, class MapIterator, class OutputIterator>
inline void scatter(InputIterator first,
diff --git a/boost/compute/algorithm/scatter_if.hpp b/boost/compute/algorithm/scatter_if.hpp
index 159edd8c86..c7db51d3be 100644
--- a/boost/compute/algorithm/scatter_if.hpp
+++ b/boost/compute/algorithm/scatter_if.hpp
@@ -83,7 +83,7 @@ private:
/// at \p map if stencil is resolved to true. By default the predicate is
/// an identity
///
-///
+/// Space complexity: \Omega(1)
template<class InputIterator, class MapIterator, class StencilIterator, class OutputIterator,
class Predicate>
inline void scatter_if(InputIterator first,
diff --git a/boost/compute/algorithm/search.hpp b/boost/compute/algorithm/search.hpp
index 3d3d035b3c..a1f3dece62 100644
--- a/boost/compute/algorithm/search.hpp
+++ b/boost/compute/algorithm/search.hpp
@@ -34,6 +34,7 @@ namespace compute {
/// \param p_last Iterator pointing to end of pattern
/// \param queue Queue on which to execute
///
+/// Space complexity: \Omega(distance(\p t_first, \p t_last))
template<class TextIterator, class PatternIterator>
inline TextIterator search(TextIterator t_first,
TextIterator t_last,
diff --git a/boost/compute/algorithm/search_n.hpp b/boost/compute/algorithm/search_n.hpp
index 9e03111bb0..86ff64dfd9 100644
--- a/boost/compute/algorithm/search_n.hpp
+++ b/boost/compute/algorithm/search_n.hpp
@@ -102,6 +102,7 @@ private:
/// \param value Value which repeats
/// \param queue Queue on which to execute
///
+/// Space complexity: \Omega(distance(\p t_first, \p t_last))
template<class TextIterator, class ValueType>
inline TextIterator search_n(TextIterator t_first,
TextIterator t_last,
diff --git a/boost/compute/algorithm/set_difference.hpp b/boost/compute/algorithm/set_difference.hpp
index 17ce7bd3f6..85a846ba13 100644
--- a/boost/compute/algorithm/set_difference.hpp
+++ b/boost/compute/algorithm/set_difference.hpp
@@ -122,6 +122,8 @@ private:
/// will be stored
/// \param queue Queue on which to execute
///
+/// Space complexity:
+/// \Omega(2(distance(\p first1, \p last1) + distance(\p first2, \p last2)))
template<class InputIterator1, class InputIterator2, class OutputIterator>
inline OutputIterator set_difference(InputIterator1 first1,
InputIterator1 last1,
diff --git a/boost/compute/algorithm/set_intersection.hpp b/boost/compute/algorithm/set_intersection.hpp
index 50f291e84a..74d46f57c6 100644
--- a/boost/compute/algorithm/set_intersection.hpp
+++ b/boost/compute/algorithm/set_intersection.hpp
@@ -110,6 +110,8 @@ private:
/// will be stored
/// \param queue Queue on which to execute
///
+/// Space complexity:
+/// \Omega(2(distance(\p first1, \p last1) + distance(\p first2, \p last2)))
template<class InputIterator1, class InputIterator2, class OutputIterator>
inline OutputIterator set_intersection(InputIterator1 first1,
InputIterator1 last1,
diff --git a/boost/compute/algorithm/set_symmetric_difference.hpp b/boost/compute/algorithm/set_symmetric_difference.hpp
index 6e60b38511..34d280daa3 100644
--- a/boost/compute/algorithm/set_symmetric_difference.hpp
+++ b/boost/compute/algorithm/set_symmetric_difference.hpp
@@ -133,13 +133,16 @@ private:
/// difference will be stored
/// \param queue Queue on which to execute
///
+/// Space complexity:
+/// \Omega(2(distance(\p first1, \p last1) + distance(\p first2, \p last2)))
template<class InputIterator1, class InputIterator2, class OutputIterator>
-inline OutputIterator set_symmetric_difference(InputIterator1 first1,
- InputIterator1 last1,
- InputIterator2 first2,
- InputIterator2 last2,
- OutputIterator result,
- command_queue &queue = system::default_queue())
+inline OutputIterator
+set_symmetric_difference(InputIterator1 first1,
+ InputIterator1 last1,
+ InputIterator2 first2,
+ InputIterator2 last2,
+ OutputIterator result,
+ command_queue &queue = system::default_queue())
{
typedef typename std::iterator_traits<InputIterator1>::value_type value_type;
diff --git a/boost/compute/algorithm/set_union.hpp b/boost/compute/algorithm/set_union.hpp
index c61f7b29b3..6b405a0905 100644
--- a/boost/compute/algorithm/set_union.hpp
+++ b/boost/compute/algorithm/set_union.hpp
@@ -135,6 +135,8 @@ private:
/// will be stored
/// \param queue Queue on which to execute
///
+/// Space complexity:
+/// \Omega(2(distance(\p first1, \p last1) + distance(\p first2, \p last2)))
template<class InputIterator1, class InputIterator2, class OutputIterator>
inline OutputIterator set_union(InputIterator1 first1,
InputIterator1 last1,
diff --git a/boost/compute/algorithm/sort.hpp b/boost/compute/algorithm/sort.hpp
index 7e0a583e3e..b8fa90f335 100644
--- a/boost/compute/algorithm/sort.hpp
+++ b/boost/compute/algorithm/sort.hpp
@@ -176,6 +176,8 @@ inline void dispatch_sort(Iterator first,
/// boost::compute::sort(data.begin(), data.end(), queue);
/// \endcode
///
+/// Space complexity: \Omega(n)
+///
/// \see is_sorted()
template<class Iterator, class Compare>
inline void sort(Iterator first,
diff --git a/boost/compute/algorithm/sort_by_key.hpp b/boost/compute/algorithm/sort_by_key.hpp
index c39bcf9890..fdd2d1c481 100644
--- a/boost/compute/algorithm/sort_by_key.hpp
+++ b/boost/compute/algorithm/sort_by_key.hpp
@@ -128,6 +128,8 @@ inline void dispatch_sort_by_key(KeyIterator keys_first,
///
/// If no compare function is specified, \c less is used.
///
+/// Space complexity: \Omega(2n)
+///
/// \see sort()
template<class KeyIterator, class ValueIterator, class Compare>
inline void sort_by_key(KeyIterator keys_first,
diff --git a/boost/compute/algorithm/stable_partition.hpp b/boost/compute/algorithm/stable_partition.hpp
index 283b068283..2b07f034b9 100644
--- a/boost/compute/algorithm/stable_partition.hpp
+++ b/boost/compute/algorithm/stable_partition.hpp
@@ -33,6 +33,8 @@ namespace compute {
/// \param predicate Unary predicate to be applied on each element
/// \param queue Queue on which to execute
///
+/// Space complexity: \Omega(3n)
+///
/// \see is_partitioned() and partition()
///
template<class Iterator, class UnaryPredicate>
diff --git a/boost/compute/algorithm/stable_sort.hpp b/boost/compute/algorithm/stable_sort.hpp
index 381fc81bc0..0857d75dc9 100644
--- a/boost/compute/algorithm/stable_sort.hpp
+++ b/boost/compute/algorithm/stable_sort.hpp
@@ -72,6 +72,8 @@ dispatch_gpu_stable_sort(buffer_iterator<T> first,
/// Sorts the values in the range [\p first, \p last) according to
/// \p compare. The relative order of identical values is preserved.
///
+/// Space complexity: \Omega(n)
+///
/// \see sort(), is_sorted()
template<class Iterator, class Compare>
inline void stable_sort(Iterator first,
diff --git a/boost/compute/algorithm/stable_sort_by_key.hpp b/boost/compute/algorithm/stable_sort_by_key.hpp
index 878f999f44..ce8811ef19 100644
--- a/boost/compute/algorithm/stable_sort_by_key.hpp
+++ b/boost/compute/algorithm/stable_sort_by_key.hpp
@@ -126,6 +126,8 @@ inline void dispatch_ssort_by_key(KeyIterator keys_first,
///
/// If no compare function is specified, \c less is used.
///
+/// Space complexity: \Omega(2n)
+///
/// \see sort()
template<class KeyIterator, class ValueIterator, class Compare>
inline void stable_sort_by_key(KeyIterator keys_first,
diff --git a/boost/compute/algorithm/swap_ranges.hpp b/boost/compute/algorithm/swap_ranges.hpp
index 6ff3e14f6a..a706df7a61 100644
--- a/boost/compute/algorithm/swap_ranges.hpp
+++ b/boost/compute/algorithm/swap_ranges.hpp
@@ -21,6 +21,8 @@ namespace compute {
/// Swaps the elements in the range [\p first1, \p last1) with the
/// elements in the range beginning at \p first2.
+///
+/// Space complexity: \Omega(distance(\p first1, \p last1))
template<class Iterator1, class Iterator2>
inline Iterator2 swap_ranges(Iterator1 first1,
Iterator1 last1,
diff --git a/boost/compute/algorithm/transform.hpp b/boost/compute/algorithm/transform.hpp
index 68750a6523..9137604d55 100644
--- a/boost/compute/algorithm/transform.hpp
+++ b/boost/compute/algorithm/transform.hpp
@@ -29,6 +29,8 @@ namespace compute {
///
/// \snippet test/test_transform.cpp transform_abs
///
+/// Space complexity: \Omega(1)
+///
/// \see copy()
template<class InputIterator, class OutputIterator, class UnaryOperator>
inline OutputIterator transform(InputIterator first,
diff --git a/boost/compute/algorithm/transform_if.hpp b/boost/compute/algorithm/transform_if.hpp
index 0eb0fd434e..9a98102d27 100644
--- a/boost/compute/algorithm/transform_if.hpp
+++ b/boost/compute/algorithm/transform_if.hpp
@@ -26,6 +26,7 @@ namespace boost {
namespace compute {
namespace detail {
+// Space complexity: O(2n)
template<class InputIterator, class OutputIterator, class UnaryFunction, class Predicate>
inline OutputIterator transform_if_impl(InputIterator first,
InputIterator last,
@@ -53,14 +54,12 @@ inline OutputIterator transform_if_impl(InputIterator first,
<< predicate(first[k1.get_global_id(0)]) << " ? 1 : 0;\n";
k1.exec_1d(queue, 0, count);
- // count number of elements to be copied
- size_t copied_element_count =
- ::boost::compute::count(indices.begin(), indices.end(), 1, queue);
-
// scan indices
+ size_t copied_element_count = (indices.cend() - 1).read(queue);
::boost::compute::exclusive_scan(
indices.begin(), indices.end(), indices.begin(), queue
);
+ copied_element_count += (indices.cend() - 1).read(queue); // last scan element plus last mask element
// copy values
::boost::compute::detail::meta_kernel k2("transform_if_do_copy");
@@ -98,6 +97,8 @@ inline discard_iterator transform_if_impl(InputIterator first,
/// Copies each element in the range [\p first, \p last) for which
/// \p predicate returns \c true to the range beginning at \p result.
+///
+/// Space complexity: O(2n)
template<class InputIterator, class OutputIterator, class UnaryFunction, class Predicate>
inline OutputIterator transform_if(InputIterator first,
InputIterator last,
diff --git a/boost/compute/algorithm/transform_reduce.hpp b/boost/compute/algorithm/transform_reduce.hpp
index fbeee5a691..a59a76aefd 100644
--- a/boost/compute/algorithm/transform_reduce.hpp
+++ b/boost/compute/algorithm/transform_reduce.hpp
@@ -30,6 +30,9 @@ namespace compute {
///
/// \snippet test/test_transform_reduce.cpp sum_abs_int
///
+/// Space complexity on GPUs: \Omega(n)<br>
+/// Space complexity on CPUs: \Omega(1)
+///
/// \see reduce(), inner_product()
template<class InputIterator,
class OutputIterator,
diff --git a/boost/compute/algorithm/unique.hpp b/boost/compute/algorithm/unique.hpp
index faa36bad9d..8b7e2a0d0d 100644
--- a/boost/compute/algorithm/unique.hpp
+++ b/boost/compute/algorithm/unique.hpp
@@ -31,6 +31,8 @@ namespace compute {
///
/// \return \c InputIterator to the new logical end of the range
///
+/// Space complexity: \Omega(4n)
+///
/// \see unique_copy()
template<class InputIterator, class BinaryPredicate>
inline InputIterator unique(InputIterator first,
diff --git a/boost/compute/algorithm/unique_copy.hpp b/boost/compute/algorithm/unique_copy.hpp
index 2ce60a9359..d5fffd4ff9 100644
--- a/boost/compute/algorithm/unique_copy.hpp
+++ b/boost/compute/algorithm/unique_copy.hpp
@@ -127,6 +127,8 @@ inline OutputIterator unique_copy(InputIterator first,
///
/// \return \c OutputIterator to the end of the result range
///
+/// Space complexity: \Omega(4n)
+///
/// \see unique()
template<class InputIterator, class OutputIterator, class BinaryPredicate>
inline OutputIterator unique_copy(InputIterator first,
diff --git a/boost/compute/algorithm/upper_bound.hpp b/boost/compute/algorithm/upper_bound.hpp
index a5a82d301c..f592c79b9a 100644
--- a/boost/compute/algorithm/upper_bound.hpp
+++ b/boost/compute/algorithm/upper_bound.hpp
@@ -22,6 +22,8 @@ namespace compute {
/// Returns an iterator pointing to the first element in the sorted
/// range [\p first, \p last) that is not less than or equal to
/// \p value.
+///
+/// Space complexity: \Omega(1)
template<class InputIterator, class T>
inline InputIterator
upper_bound(InputIterator first,
diff --git a/boost/compute/buffer.hpp b/boost/compute/buffer.hpp
index b5a48806d5..128403cd62 100644
--- a/boost/compute/buffer.hpp
+++ b/boost/compute/buffer.hpp
@@ -154,7 +154,7 @@ public:
/// \p queue to perform the copy.
buffer clone(command_queue &queue) const;
- #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ #if defined(BOOST_COMPUTE_CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
/// Creates a new buffer out of this buffer.
/// The new buffer is a sub region of this buffer.
/// \p flags The mem_flags which should be used to create the new buffer
@@ -187,7 +187,7 @@ public:
return buffer(mem, false);
}
- #endif // CL_VERSION_1_1
+ #endif // BOOST_COMPUTE_CL_VERSION_1_1
};
/// \internal_ define get_info() specializations for buffer
@@ -201,12 +201,12 @@ BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(buffer,
((cl_context, CL_MEM_CONTEXT))
)
-#ifdef CL_VERSION_1_1
+#ifdef BOOST_COMPUTE_CL_VERSION_1_1
BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(buffer,
((cl_mem, CL_MEM_ASSOCIATED_MEMOBJECT))
((size_t, CL_MEM_OFFSET))
)
-#endif // CL_VERSION_1_1
+#endif // BOOST_COMPUTE_CL_VERSION_1_1
namespace detail {
diff --git a/boost/compute/cl.hpp b/boost/compute/cl.hpp
index c439d8dfdc..fe25ffde56 100644
--- a/boost/compute/cl.hpp
+++ b/boost/compute/cl.hpp
@@ -11,10 +11,58 @@
#ifndef BOOST_COMPUTE_CL_HPP
#define BOOST_COMPUTE_CL_HPP
+#if defined(BOOST_COMPUTE_MAX_CL_VERSION)
+# if !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS) && BOOST_COMPUTE_MAX_CL_VERSION < 202
+# define CL_USE_DEPRECATED_OPENCL_2_1_APIS
+# endif
+# if !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS) && BOOST_COMPUTE_MAX_CL_VERSION < 201
+# define CL_USE_DEPRECATED_OPENCL_2_0_APIS
+# endif
+# if !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) && BOOST_COMPUTE_MAX_CL_VERSION < 200
+# define CL_USE_DEPRECATED_OPENCL_1_2_APIS
+# endif
+# if !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) && BOOST_COMPUTE_MAX_CL_VERSION < 102
+# define CL_USE_DEPRECATED_OPENCL_1_1_APIS
+# endif
+# if !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS) && BOOST_COMPUTE_MAX_CL_VERSION < 101
+# define CL_USE_DEPRECATED_OPENCL_1_0_APIS
+# endif
+#endif
+
#if defined(__APPLE__)
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
+// select what OpenCL core API versions to use
+#if defined(CL_VERSION_1_0)
+# define BOOST_COMPUTE_CL_VERSION_1_0
+#endif
+#if defined(CL_VERSION_1_1)
+# if !defined(BOOST_COMPUTE_MAX_CL_VERSION) || BOOST_COMPUTE_MAX_CL_VERSION >= 101
+# define BOOST_COMPUTE_CL_VERSION_1_1
+# endif
+#endif
+#if defined(CL_VERSION_1_2)
+# if !defined(BOOST_COMPUTE_MAX_CL_VERSION) || BOOST_COMPUTE_MAX_CL_VERSION >= 102
+# define BOOST_COMPUTE_CL_VERSION_1_2
+# endif
+#endif
+#if defined(CL_VERSION_2_0)
+# if !defined(BOOST_COMPUTE_MAX_CL_VERSION) || BOOST_COMPUTE_MAX_CL_VERSION >= 200
+# define BOOST_COMPUTE_CL_VERSION_2_0
+# endif
+#endif
+#if defined(CL_VERSION_2_1)
+# if !defined(BOOST_COMPUTE_MAX_CL_VERSION) || BOOST_COMPUTE_MAX_CL_VERSION >= 201
+# define BOOST_COMPUTE_CL_VERSION_2_1
+# endif
+#endif
+#if defined(CL_VERSION_2_2)
+# if !defined(BOOST_COMPUTE_MAX_CL_VERSION) || BOOST_COMPUTE_MAX_CL_VERSION >= 202
+# define BOOST_COMPUTE_CL_VERSION_2_2
+# endif
+#endif
+
#endif // BOOST_COMPUTE_CL_HPP
diff --git a/boost/compute/command_queue.hpp b/boost/compute/command_queue.hpp
index 2a1328a959..d9e81e0bd4 100644
--- a/boost/compute/command_queue.hpp
+++ b/boost/compute/command_queue.hpp
@@ -81,12 +81,17 @@ public:
enum properties {
enable_profiling = CL_QUEUE_PROFILING_ENABLE,
enable_out_of_order_execution = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
+ #ifdef BOOST_COMPUTE_CL_VERSION_2_0
+ ,
+ on_device = CL_QUEUE_ON_DEVICE,
+ on_device_default = CL_QUEUE_ON_DEVICE_DEFAULT
+ #endif
};
enum map_flags {
map_read = CL_MAP_READ,
map_write = CL_MAP_WRITE
- #ifdef CL_VERSION_1_2
+ #ifdef BOOST_COMPUTE_CL_VERSION_1_2
,
map_write_invalidate_region = CL_MAP_WRITE_INVALIDATE_REGION
#endif
@@ -118,7 +123,7 @@ public:
cl_int error = 0;
- #ifdef CL_VERSION_2_0
+ #ifdef BOOST_COMPUTE_CL_VERSION_2_0
if (device.check_version(2, 0)){
std::vector<cl_queue_properties> queue_properties;
if(properties){
@@ -323,7 +328,7 @@ public:
return event_;
}
- #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ #if defined(BOOST_COMPUTE_CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
/// Enqueues a command to read a rectangular region from \p buffer to
/// host memory.
///
@@ -417,7 +422,7 @@ public:
return event_;
}
- #endif // CL_VERSION_1_1
+ #endif // BOOST_COMPUTE_CL_VERSION_1_1
/// Enqueues a command to write data from host memory to \p buffer.
///
@@ -494,7 +499,7 @@ public:
return event_;
}
- #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ #if defined(BOOST_COMPUTE_CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
/// Enqueues a command to write a rectangular region from host memory
/// to \p buffer.
///
@@ -588,7 +593,7 @@ public:
return event_;
}
- #endif // CL_VERSION_1_1
+ #endif // BOOST_COMPUTE_CL_VERSION_1_1
/// Enqueues a command to copy data from \p src_buffer to
/// \p dst_buffer.
@@ -630,7 +635,7 @@ public:
return event_;
}
- #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ #if defined(BOOST_COMPUTE_CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
/// Enqueues a command to copy a rectangular region from
/// \p src_buffer to \p dst_buffer.
///
@@ -676,9 +681,9 @@ public:
return event_;
}
- #endif // CL_VERSION_1_1
+ #endif // BOOST_COMPUTE_CL_VERSION_1_1
- #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
/// Enqueues a command to fill \p buffer with \p pattern.
///
/// \see_opencl_ref{clEnqueueFillBuffer}
@@ -717,7 +722,7 @@ public:
return event_;
}
- #endif // CL_VERSION_1_2
+ #endif // BOOST_COMPUTE_CL_VERSION_1_2
/// Enqueues a command to map \p buffer into the host address space.
/// Event associated with map operation is returned through
@@ -1269,7 +1274,7 @@ public:
return event_;
}
- #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
/// Enqueues a command to fill \p image with \p fill_color.
///
/// \see_opencl_ref{clEnqueueFillImage}
@@ -1354,7 +1359,7 @@ public:
return event_;
}
- #endif // CL_VERSION_1_2
+ #endif // BOOST_COMPUTE_CL_VERSION_1_2
/// Enqueues a kernel for execution.
///
@@ -1438,7 +1443,7 @@ public:
// clEnqueueTask() was deprecated in OpenCL 2.0. In that case we
// just forward to the equivalent clEnqueueNDRangeKernel() call.
- #ifdef CL_VERSION_2_0
+ #ifdef BOOST_COMPUTE_CL_VERSION_2_0
size_t one = 1;
cl_int ret = clEnqueueNDRangeKernel(
m_queue, kernel, 1, 0, &one, &one,
@@ -1511,7 +1516,10 @@ public:
{
BOOST_ASSERT(m_queue != 0);
- clFlush(m_queue);
+ cl_int ret = clFlush(m_queue);
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
}
/// Blocks until all outstanding commands in the queue have finished.
@@ -1521,7 +1529,10 @@ public:
{
BOOST_ASSERT(m_queue != 0);
- clFinish(m_queue);
+ cl_int ret = clFinish(m_queue);
+ if(ret != CL_SUCCESS){
+ BOOST_THROW_EXCEPTION(opencl_error(ret));
+ }
}
/// Enqueues a barrier in the queue.
@@ -1530,11 +1541,11 @@ public:
BOOST_ASSERT(m_queue != 0);
cl_int ret = CL_SUCCESS;
- #ifdef CL_VERSION_1_2
+ #ifdef BOOST_COMPUTE_CL_VERSION_1_2
if(get_device().check_version(1, 2)){
ret = clEnqueueBarrierWithWaitList(m_queue, 0, 0, 0);
} else
- #endif // CL_VERSION_1_2
+ #endif // BOOST_COMPUTE_CL_VERSION_1_2
{
// Suppress deprecated declarations warning
BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS();
@@ -1547,7 +1558,7 @@ public:
}
}
- #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
/// Enqueues a barrier in the queue after \p events.
///
/// \opencl_version_warning{1,2}
@@ -1568,7 +1579,7 @@ public:
return event_;
}
- #endif // CL_VERSION_1_2
+ #endif // BOOST_COMPUTE_CL_VERSION_1_2
/// Enqueues a marker in the queue and returns an event that can be
/// used to track its progress.
@@ -1577,7 +1588,7 @@ public:
event event_;
cl_int ret = CL_SUCCESS;
- #ifdef CL_VERSION_1_2
+ #ifdef BOOST_COMPUTE_CL_VERSION_1_2
if(get_device().check_version(1, 2)){
ret = clEnqueueMarkerWithWaitList(m_queue, 0, 0, &event_.get());
} else
@@ -1596,7 +1607,7 @@ public:
return event_;
}
- #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
/// Enqueues a marker after \p events in the queue and returns an
/// event that can be used to track its progress.
///
@@ -1615,9 +1626,9 @@ public:
return event_;
}
- #endif // CL_VERSION_1_2
+ #endif // BOOST_COMPUTE_CL_VERSION_1_2
- #if defined(CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
/// Enqueues a command to copy \p size bytes of data from \p src_ptr to
/// \p dst_ptr.
///
@@ -1797,7 +1808,7 @@ public:
return event_;
}
- #endif // CL_VERSION_2_0
+ #endif // BOOST_COMPUTE_CL_VERSION_2_0
/// Returns \c true if the command queue is the same at \p other.
bool operator==(const command_queue &other) const
diff --git a/boost/compute/container/detail/scalar.hpp b/boost/compute/container/detail/scalar.hpp
index 7ecd86e540..4dee76d2b4 100644
--- a/boost/compute/container/detail/scalar.hpp
+++ b/boost/compute/container/detail/scalar.hpp
@@ -12,6 +12,7 @@
#define BOOST_COMPUTE_CONTAINER_DETAIL_SCALAR_HPP
#include <boost/compute/buffer.hpp>
+#include <boost/compute/event.hpp>
#include <boost/compute/detail/read_write_single_value.hpp>
namespace boost {
@@ -40,9 +41,9 @@ public:
return read_single_value<T>(m_buffer, 0, queue);
}
- void write(const T &value, command_queue &queue)
+ event write(const T &value, command_queue &queue)
{
- write_single_value<T>(value, m_buffer, 0, queue);
+ return write_single_value<T>(value, m_buffer, 0, queue);
}
const buffer& get_buffer() const
diff --git a/boost/compute/container/valarray.hpp b/boost/compute/container/valarray.hpp
index 8ac8e01753..981ec2e545 100644
--- a/boost/compute/container/valarray.hpp
+++ b/boost/compute/container/valarray.hpp
@@ -67,6 +67,7 @@ public:
valarray(const valarray<T> &other)
: m_buffer(other.m_buffer.get_context(), other.size() * sizeof(T))
{
+ copy(other.begin(), other.end(), begin());
}
valarray(const std::valarray<T> &valarray,
diff --git a/boost/compute/context.hpp b/boost/compute/context.hpp
index 61e84e9767..13154a968a 100644
--- a/boost/compute/context.hpp
+++ b/boost/compute/context.hpp
@@ -233,11 +233,11 @@ BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(context,
((std::vector<cl_context_properties>, CL_CONTEXT_PROPERTIES))
)
-#ifdef CL_VERSION_1_1
+#ifdef BOOST_COMPUTE_CL_VERSION_1_1
BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(context,
((cl_uint, CL_CONTEXT_NUM_DEVICES))
)
-#endif // CL_VERSION_1_1
+#endif // BOOST_COMPUTE_CL_VERSION_1_1
} // end compute namespace
} // end boost namespace
diff --git a/boost/compute/detail/buffer_value.hpp b/boost/compute/detail/buffer_value.hpp
index 6a4e78fc19..478fc03252 100644
--- a/boost/compute/detail/buffer_value.hpp
+++ b/boost/compute/detail/buffer_value.hpp
@@ -124,7 +124,9 @@ public:
const context &context = m_buffer.get_context();
command_queue queue(context, context.get_device());
- detail::write_single_value<T>(value, m_buffer, m_index / sizeof(T), queue);
+ detail::write_single_value<T>(
+ value, m_buffer, m_index / sizeof(T), queue
+ ).wait();
return *this;
}
diff --git a/boost/compute/detail/duration.hpp b/boost/compute/detail/duration.hpp
index 601f12d291..98e825fb3c 100644
--- a/boost/compute/detail/duration.hpp
+++ b/boost/compute/detail/duration.hpp
@@ -17,7 +17,9 @@
#include <chrono>
#endif
+#ifndef BOOST_COMPUTE_NO_BOOST_CHRONO
#include <boost/chrono/duration.hpp>
+#endif
namespace boost {
namespace compute {
@@ -34,6 +36,7 @@ make_duration_from_nanoseconds(std::chrono::duration<Rep, Period>, size_t nanose
}
#endif // BOOST_COMPUTE_NO_HDR_CHRONO
+#ifndef BOOST_COMPUTE_NO_BOOST_CHRONO
template<class Rep, class Period>
inline boost::chrono::duration<Rep, Period>
make_duration_from_nanoseconds(boost::chrono::duration<Rep, Period>, size_t nanoseconds)
@@ -42,6 +45,7 @@ make_duration_from_nanoseconds(boost::chrono::duration<Rep, Period>, size_t nano
boost::chrono::nanoseconds(nanoseconds)
);
}
+#endif // BOOST_COMPUTE_NO_BOOST_CHRONO
} // end detail namespace
} // end compute namespace
diff --git a/boost/compute/detail/meta_kernel.hpp b/boost/compute/detail/meta_kernel.hpp
index 5e6d6e0337..13af7cc437 100644
--- a/boost/compute/detail/meta_kernel.hpp
+++ b/boost/compute/detail/meta_kernel.hpp
@@ -1036,7 +1036,7 @@ inline meta_kernel& operator<<(meta_kernel &kernel,
}
// SVM requires OpenCL 2.0
-#if defined(CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+#if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
template<class T, class IndexExpr>
inline meta_kernel& operator<<(meta_kernel &kernel,
const svm_ptr_index_expr<T, IndexExpr> &expr)
@@ -1072,7 +1072,7 @@ inline meta_kernel& operator<<(meta_kernel &kernel,
BOOST_STATIC_ASSERT(N < 16);
if(N < 10){
- return kernel << expr.m_arg << ".s" << uint_(N);
+ return kernel << expr.m_arg << ".s" << int_(N);
}
else if(N < 16){
#ifdef _MSC_VER
diff --git a/boost/compute/detail/parameter_cache.hpp b/boost/compute/detail/parameter_cache.hpp
index 0a16cd9b0e..c609490c1e 100644
--- a/boost/compute/detail/parameter_cache.hpp
+++ b/boost/compute/detail/parameter_cache.hpp
@@ -24,6 +24,7 @@
#include <boost/compute/version.hpp>
#ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE
+#include <cstdio>
#include <boost/algorithm/string/trim.hpp>
#include <boost/compute/detail/path.hpp>
#include <boost/property_tree/ptree.hpp>
@@ -117,9 +118,16 @@ private:
static std::string version_string()
{
char buf[32];
- std::snprintf(buf, sizeof(buf), "%d.%d.%d", BOOST_COMPUTE_VERSION_MAJOR,
- BOOST_COMPUTE_VERSION_MINOR,
- BOOST_COMPUTE_VERSION_PATCH);
+ // snprintf is in Visual Studio since Visual Studio 2015 (_MSC_VER == 1900)
+ #if defined (_MSC_VER) && _MSC_VER < 1900
+ #define DETAIL_SNPRINTF sprintf_s
+ #else
+ #define DETAIL_SNPRINTF std::snprintf
+ #endif
+ DETAIL_SNPRINTF(buf, sizeof(buf), "%d.%d.%d", BOOST_COMPUTE_VERSION_MAJOR,
+ BOOST_COMPUTE_VERSION_MINOR,
+ BOOST_COMPUTE_VERSION_PATCH);
+ #undef DETAIL_SNPRINTF
return buf;
}
diff --git a/boost/compute/detail/path.hpp b/boost/compute/detail/path.hpp
index ec8760eaf9..d9c5afd182 100644
--- a/boost/compute/detail/path.hpp
+++ b/boost/compute/detail/path.hpp
@@ -30,7 +30,7 @@ static const std::string& path_delim()
// Path to appdata folder.
inline const std::string& appdata_path()
{
- #ifdef WIN32
+ #ifdef _WIN32
static const std::string appdata = detail::getenv("APPDATA")
+ path_delim() + "boost_compute";
#else
diff --git a/boost/compute/detail/read_write_single_value.hpp b/boost/compute/detail/read_write_single_value.hpp
index fde40d946c..3e613bc8c3 100644
--- a/boost/compute/detail/read_write_single_value.hpp
+++ b/boost/compute/detail/read_write_single_value.hpp
@@ -14,6 +14,7 @@
#include <boost/throw_exception.hpp>
#include <boost/compute/buffer.hpp>
+#include <boost/compute/event.hpp>
#include <boost/compute/exception.hpp>
#include <boost/compute/command_queue.hpp>
@@ -47,18 +48,18 @@ inline T read_single_value(const buffer &buffer, command_queue &queue)
// writes a single value at index to the buffer
template<class T>
-inline void write_single_value(const T &value,
- const buffer &buffer,
- size_t index,
- command_queue &queue)
+inline event write_single_value(const T &value,
+ const buffer &buffer,
+ size_t index,
+ command_queue &queue)
{
BOOST_ASSERT(index < buffer.size() / sizeof(T));
BOOST_ASSERT(buffer.get_context() == queue.get_context());
- queue.enqueue_write_buffer(buffer,
- index * sizeof(T),
- sizeof(T),
- &value);
+ return queue.enqueue_write_buffer(buffer,
+ index * sizeof(T),
+ sizeof(T),
+ &value);
}
// writes value to the first location in buffer
diff --git a/boost/compute/device.hpp b/boost/compute/device.hpp
index 5cf2e8c931..427d75d60b 100644
--- a/boost/compute/device.hpp
+++ b/boost/compute/device.hpp
@@ -62,7 +62,7 @@ public:
explicit device(cl_device_id id, bool retain = true)
: m_id(id)
{
- #ifdef CL_VERSION_1_2
+ #ifdef BOOST_COMPUTE_CL_VERSION_1_2
if(m_id && retain && is_subdevice()){
clRetainDevice(m_id);
}
@@ -75,7 +75,7 @@ public:
device(const device &other)
: m_id(other.m_id)
{
- #ifdef CL_VERSION_1_2
+ #ifdef BOOST_COMPUTE_CL_VERSION_1_2
if(m_id && is_subdevice()){
clRetainDevice(m_id);
}
@@ -86,7 +86,7 @@ public:
device& operator=(const device &other)
{
if(this != &other){
- #ifdef CL_VERSION_1_2
+ #ifdef BOOST_COMPUTE_CL_VERSION_1_2
if(m_id && is_subdevice()){
clReleaseDevice(m_id);
}
@@ -94,7 +94,7 @@ public:
m_id = other.m_id;
- #ifdef CL_VERSION_1_2
+ #ifdef BOOST_COMPUTE_CL_VERSION_1_2
if(m_id && is_subdevice()){
clRetainDevice(m_id);
}
@@ -115,7 +115,7 @@ public:
/// Move-assigns the device from \p other to \c *this.
device& operator=(device&& other) BOOST_NOEXCEPT
{
- #ifdef CL_VERSION_1_2
+ #ifdef BOOST_COMPUTE_CL_VERSION_1_2
if(m_id && is_subdevice()){
clReleaseDevice(m_id);
}
@@ -131,7 +131,7 @@ public:
/// Destroys the device object.
~device()
{
- #ifdef CL_VERSION_1_2
+ #ifdef BOOST_COMPUTE_CL_VERSION_1_2
if(m_id && is_subdevice()){
BOOST_COMPUTE_ASSERT_CL_SUCCESS(
clReleaseDevice(m_id)
@@ -282,7 +282,7 @@ public:
/// Returns \c true if the device is a sub-device.
bool is_subdevice() const
{
- #if defined(CL_VERSION_1_2)
+ #if defined(BOOST_COMPUTE_CL_VERSION_1_2)
try {
return get_info<cl_device_id>(CL_DEVICE_PARENT_DEVICE) != 0;
}
@@ -321,7 +321,7 @@ public:
typename detail::get_object_info_type<device, Enum>::type
get_info() const;
- #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
/// Partitions the device into multiple sub-devices according to
/// \p properties.
///
@@ -393,7 +393,7 @@ public:
return partition(properties);
}
- #endif // CL_VERSION_1_2
+ #endif // BOOST_COMPUTE_CL_VERSION_1_2
/// Returns \c true if the device is the same at \p other.
bool operator==(const device &other) const
@@ -528,7 +528,7 @@ BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device,
)
#endif
-#ifdef CL_VERSION_1_1
+#ifdef BOOST_COMPUTE_CL_VERSION_1_1
BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device,
((bool, CL_DEVICE_HOST_UNIFIED_MEMORY))
((cl_uint, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR))
@@ -539,9 +539,9 @@ BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device,
((cl_uint, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE))
((std::string, CL_DEVICE_OPENCL_C_VERSION))
)
-#endif // CL_VERSION_1_1
+#endif // BOOST_COMPUTE_CL_VERSION_1_1
-#ifdef CL_VERSION_1_2
+#ifdef BOOST_COMPUTE_CL_VERSION_1_2
BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device,
((std::string, CL_DEVICE_BUILT_IN_KERNELS))
((bool, CL_DEVICE_LINKER_AVAILABLE))
@@ -554,9 +554,9 @@ BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device,
((bool, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC))
((cl_uint, CL_DEVICE_REFERENCE_COUNT))
)
-#endif // CL_VERSION_1_2
+#endif // BOOST_COMPUTE_CL_VERSION_1_2
-#ifdef CL_VERSION_2_0
+#ifdef BOOST_COMPUTE_CL_VERSION_2_0
BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device,
((size_t, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE))
((size_t, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE))
@@ -576,7 +576,7 @@ BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device,
((cl_uint, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT))
((cl_uint, CL_DEVICE_IMAGE_PITCH_ALIGNMENT))
)
-#endif // CL_VERSION_2_0
+#endif // BOOST_COMPUTE_CL_VERSION_2_0
} // end compute namespace
} // end boost namespace
diff --git a/boost/compute/event.hpp b/boost/compute/event.hpp
index 2f53d87650..030c1bb3ba 100644
--- a/boost/compute/event.hpp
+++ b/boost/compute/event.hpp
@@ -74,7 +74,7 @@ public:
marker = CL_COMMAND_MARKER,
aquire_gl_objects = CL_COMMAND_ACQUIRE_GL_OBJECTS,
release_gl_object = CL_COMMAND_RELEASE_GL_OBJECTS
- #if defined(CL_VERSION_1_1)
+ #if defined(BOOST_COMPUTE_CL_VERSION_1_1)
,
read_buffer_rect = CL_COMMAND_READ_BUFFER_RECT,
write_buffer_rect = CL_COMMAND_WRITE_BUFFER_RECT,
@@ -218,7 +218,7 @@ public:
}
}
- #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ #if defined(BOOST_COMPUTE_CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
/// Registers a function to be called when the event status changes to
/// \p status (by default CL_COMPLETE). The callback is passed the OpenCL
/// event object, the event status, and a pointer to arbitrary user data.
@@ -254,7 +254,7 @@ public:
new boost::function<void()>(callback)
);
}
- #endif // CL_VERSION_1_1
+ #endif // BOOST_COMPUTE_CL_VERSION_1_1
/// Returns the total duration of the event from \p start to \p end.
///
@@ -300,7 +300,7 @@ public:
}
private:
- #ifdef CL_VERSION_1_1
+ #ifdef BOOST_COMPUTE_CL_VERSION_1_1
/// \internal_
static void BOOST_COMPUTE_CL_CALLBACK
event_callback_invoker(cl_event, cl_int, void *user_data)
@@ -312,7 +312,7 @@ private:
delete callback;
}
- #endif // CL_VERSION_1_1
+ #endif // BOOST_COMPUTE_CL_VERSION_1_1
protected:
cl_event m_event;
@@ -326,7 +326,7 @@ BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(event,
((cl_uint, CL_EVENT_REFERENCE_COUNT))
)
-#ifdef CL_VERSION_1_1
+#ifdef BOOST_COMPUTE_CL_VERSION_1_1
BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(event,
((cl_context, CL_EVENT_CONTEXT))
)
diff --git a/boost/compute/exception/opencl_error.hpp b/boost/compute/exception/opencl_error.hpp
index 29a3a9d258..0f47b56f84 100644
--- a/boost/compute/exception/opencl_error.hpp
+++ b/boost/compute/exception/opencl_error.hpp
@@ -123,7 +123,7 @@ public:
case CL_INVALID_BUFFER_SIZE: return "Invalid Buffer Size";
case CL_INVALID_MIP_LEVEL: return "Invalid MIP Level";
case CL_INVALID_GLOBAL_WORK_SIZE: return "Invalid Global Work Size";
- #ifdef CL_VERSION_1_2
+ #ifdef BOOST_COMPUTE_CL_VERSION_1_2
case CL_COMPILE_PROGRAM_FAILURE: return "Compile Program Failure";
case CL_LINKER_NOT_AVAILABLE: return "Linker Not Available";
case CL_LINK_PROGRAM_FAILURE: return "Link Program Failure";
@@ -134,8 +134,8 @@ public:
case CL_INVALID_COMPILER_OPTIONS: return "Invalid Compiler Options";
case CL_INVALID_LINKER_OPTIONS: return "Invalid Linker Options";
case CL_INVALID_DEVICE_PARTITION_COUNT: return "Invalid Device Partition Count";
- #endif // CL_VERSION_1_2
- #ifdef CL_VERSION_2_0
+ #endif // BOOST_COMPUTE_CL_VERSION_1_2
+ #ifdef BOOST_COMPUTE_CL_VERSION_2_0
case CL_INVALID_PIPE_SIZE: return "Invalid Pipe Size";
case CL_INVALID_DEVICE_QUEUE: return "Invalid Device Queue";
#endif
diff --git a/boost/compute/function.hpp b/boost/compute/function.hpp
index b0b893e948..a731e18751 100644
--- a/boost/compute/function.hpp
+++ b/boost/compute/function.hpp
@@ -164,6 +164,19 @@ public:
m_definitions[name] = value;
}
+ bool operator==(const function<Signature>& other) const
+ {
+ return
+ (m_name == other.m_name)
+ && (m_definitions == other.m_definitions)
+ && (m_source == other.m_source);
+ }
+
+ bool operator!=(const function<Signature>& other) const
+ {
+ return !(*this == other);
+ }
+
/// \internal_
detail::invoked_function<result_type, boost::tuple<> >
operator()() const
diff --git a/boost/compute/functional/atomic.hpp b/boost/compute/functional/atomic.hpp
index 2701561bc3..ac4ac70d7f 100644
--- a/boost/compute/functional/atomic.hpp
+++ b/boost/compute/functional/atomic.hpp
@@ -15,7 +15,7 @@
#include <boost/compute/function.hpp>
#ifndef BOOST_COMPUTE_DOXYGEN_INVOKED
-#ifdef CL_VERSION_1_1
+#ifdef BOOST_COMPUTE_CL_VERSION_1_1
#define BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "atomic_"
#else
#define BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "atom_"
diff --git a/boost/compute/image/image1d.hpp b/boost/compute/image/image1d.hpp
index 2d71934ab4..0f146be089 100644
--- a/boost/compute/image/image1d.hpp
+++ b/boost/compute/image/image1d.hpp
@@ -50,7 +50,7 @@ public:
cl_mem_flags flags = read_write,
void *host_ptr = 0)
{
- #ifdef CL_VERSION_1_2
+ #ifdef BOOST_COMPUTE_CL_VERSION_1_2
cl_image_desc desc;
desc.image_type = CL_MEM_OBJECT_IMAGE1D;
desc.image_width = image_width;
@@ -61,7 +61,7 @@ public:
desc.image_slice_pitch = 0;
desc.num_mip_levels = 0;
desc.num_samples = 0;
- #ifdef CL_VERSION_2_0
+ #ifdef BOOST_COMPUTE_CL_VERSION_2_0
desc.mem_object = 0;
#else
desc.buffer = 0;
@@ -151,7 +151,7 @@ public:
static std::vector<image_format>
get_supported_formats(const context &context, cl_mem_flags flags = read_write)
{
- #ifdef CL_VERSION_1_2
+ #ifdef BOOST_COMPUTE_CL_VERSION_1_2
return image_object::get_supported_formats(context, CL_MEM_OBJECT_IMAGE1D, flags);
#else
return std::vector<image_format>();
@@ -164,7 +164,7 @@ public:
const context &context,
cl_mem_flags flags = read_write)
{
- #ifdef CL_VERSION_1_2
+ #ifdef BOOST_COMPUTE_CL_VERSION_1_2
return image_object::is_supported_format(
format, context, CL_MEM_OBJECT_IMAGE1D, flags
);
diff --git a/boost/compute/image/image2d.hpp b/boost/compute/image/image2d.hpp
index c203a9417f..edfe98de5a 100644
--- a/boost/compute/image/image2d.hpp
+++ b/boost/compute/image/image2d.hpp
@@ -58,7 +58,7 @@ public:
{
cl_int error = 0;
- #ifdef CL_VERSION_1_2
+ #ifdef BOOST_COMPUTE_CL_VERSION_1_2
cl_image_desc desc;
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
desc.image_width = image_width;
@@ -69,7 +69,7 @@ public:
desc.image_slice_pitch = 0;
desc.num_mip_levels = 0;
desc.num_samples = 0;
- #ifdef CL_VERSION_2_0
+ #ifdef BOOST_COMPUTE_CL_VERSION_2_0
desc.mem_object = 0;
#else
desc.buffer = 0;
@@ -108,7 +108,7 @@ public:
{
cl_int error = 0;
- #ifdef CL_VERSION_1_2
+ #ifdef BOOST_COMPUTE_CL_VERSION_1_2
cl_image_desc desc;
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
desc.image_width = image_width;
@@ -119,7 +119,7 @@ public:
desc.image_slice_pitch = 0;
desc.num_mip_levels = 0;
desc.num_samples = 0;
- #ifdef CL_VERSION_2_0
+ #ifdef BOOST_COMPUTE_CL_VERSION_2_0
desc.mem_object = 0;
#else
desc.buffer = 0;
diff --git a/boost/compute/image/image3d.hpp b/boost/compute/image/image3d.hpp
index 9463cfaa16..5569f2a0b9 100644
--- a/boost/compute/image/image3d.hpp
+++ b/boost/compute/image/image3d.hpp
@@ -54,7 +54,7 @@ public:
{
cl_int error = 0;
- #ifdef CL_VERSION_1_2
+ #ifdef BOOST_COMPUTE_CL_VERSION_1_2
cl_image_desc desc;
desc.image_type = CL_MEM_OBJECT_IMAGE3D;
desc.image_width = image_width;
@@ -65,7 +65,7 @@ public:
desc.image_slice_pitch = image_slice_pitch;
desc.num_mip_levels = 0;
desc.num_samples = 0;
- #ifdef CL_VERSION_2_0
+ #ifdef BOOST_COMPUTE_CL_VERSION_2_0
desc.mem_object = 0;
#else
desc.buffer = 0;
@@ -108,7 +108,7 @@ public:
{
cl_int error = 0;
- #ifdef CL_VERSION_1_2
+ #ifdef BOOST_COMPUTE_CL_VERSION_1_2
cl_image_desc desc;
desc.image_type = CL_MEM_OBJECT_IMAGE3D;
desc.image_width = image_width;
@@ -119,7 +119,7 @@ public:
desc.image_slice_pitch = image_slice_pitch;
desc.num_mip_levels = 0;
desc.num_samples = 0;
- #ifdef CL_VERSION_2_0
+ #ifdef BOOST_COMPUTE_CL_VERSION_2_0
desc.mem_object = 0;
#else
desc.buffer = 0;
diff --git a/boost/compute/image/image_sampler.hpp b/boost/compute/image/image_sampler.hpp
index 4f1bfe9b86..26f20aa9d5 100644
--- a/boost/compute/image/image_sampler.hpp
+++ b/boost/compute/image/image_sampler.hpp
@@ -55,7 +55,7 @@ public:
{
cl_int error = 0;
- #ifdef CL_VERSION_2_0
+ #ifdef BOOST_COMPUTE_CL_VERSION_2_0
std::vector<cl_sampler_properties> sampler_properties;
sampler_properties.push_back(CL_SAMPLER_NORMALIZED_COORDS);
sampler_properties.push_back(cl_sampler_properties(normalized_coords));
diff --git a/boost/compute/interop/opengl/context.hpp b/boost/compute/interop/opengl/context.hpp
index c35fedddfa..077c86036b 100644
--- a/boost/compute/interop/opengl/context.hpp
+++ b/boost/compute/interop/opengl/context.hpp
@@ -75,6 +75,10 @@ inline context opengl_create_shared_context()
for(size_t i = 0; i < platforms.size(); i++){
const platform &platform = platforms[i];
+ // check whether this platform supports OpenCL/OpenGL sharing
+ if (!platform.supports_extension(cl_gl_sharing_extension))
+ continue;
+
// load clGetGLContextInfoKHR() extension function
GetGLContextInfoKHRFunction GetGLContextInfoKHR =
reinterpret_cast<GetGLContextInfoKHRFunction>(
@@ -92,7 +96,7 @@ inline context opengl_create_shared_context()
#if defined(__linux__)
CL_GL_CONTEXT_KHR, (cl_context_properties) glXGetCurrentContext(),
CL_GLX_DISPLAY_KHR, (cl_context_properties) glXGetCurrentDisplay(),
- #elif defined(WIN32)
+ #elif defined(_WIN32)
CL_GL_CONTEXT_KHR, (cl_context_properties) wglGetCurrentContext(),
CL_WGL_HDC_KHR, (cl_context_properties) wglGetCurrentDC(),
#endif
diff --git a/boost/compute/interop/opengl/opengl_texture.hpp b/boost/compute/interop/opengl/opengl_texture.hpp
index c1f3f4f441..ae095d22a9 100644
--- a/boost/compute/interop/opengl/opengl_texture.hpp
+++ b/boost/compute/interop/opengl/opengl_texture.hpp
@@ -51,7 +51,7 @@ public:
{
cl_int error = 0;
- #ifdef CL_VERSION_1_2
+ #ifdef BOOST_COMPUTE_CL_VERSION_1_2
m_mem = clCreateFromGLTexture(context,
flags,
texture_target,
diff --git a/boost/compute/iterator/buffer_iterator.hpp b/boost/compute/iterator/buffer_iterator.hpp
index cd68058f64..de2395a552 100644
--- a/boost/compute/iterator/buffer_iterator.hpp
+++ b/boost/compute/iterator/buffer_iterator.hpp
@@ -58,13 +58,20 @@ struct buffer_iterator_index_expr
size_t index,
const memory_object::address_space address_space,
const IndexExpr &expr)
- : m_buffer(buffer),
+ : m_buffer(buffer.get(), false),
m_index(index),
m_address_space(address_space),
m_expr(expr)
{
}
+ ~buffer_iterator_index_expr()
+ {
+ // set buffer to null so that its reference count will
+ // not be decremented when its destructor is called
+ m_buffer.get() = 0;
+ }
+
operator T() const
{
BOOST_STATIC_ASSERT_MSG(boost::is_integral<IndexExpr>::value,
@@ -73,10 +80,10 @@ struct buffer_iterator_index_expr
return buffer_value<T>(m_buffer, size_t(m_expr) * sizeof(T));
}
- const buffer &m_buffer;
- size_t m_index;
- memory_object::address_space m_address_space;
- IndexExpr m_expr;
+ const buffer m_buffer;
+ const size_t m_index;
+ const memory_object::address_space m_address_space;
+ const IndexExpr m_expr;
};
template<class T, class IndexExpr>
diff --git a/boost/compute/iterator/counting_iterator.hpp b/boost/compute/iterator/counting_iterator.hpp
index 304c1e05cf..384486bcb3 100644
--- a/boost/compute/iterator/counting_iterator.hpp
+++ b/boost/compute/iterator/counting_iterator.hpp
@@ -47,14 +47,14 @@ struct counting_iterator_index_expr
{
typedef T result_type;
- counting_iterator_index_expr(const T &init, const IndexExpr &expr)
+ counting_iterator_index_expr(const T init, const IndexExpr &expr)
: m_init(init),
m_expr(expr)
{
}
- const T &m_init;
- IndexExpr m_expr;
+ const T m_init;
+ const IndexExpr m_expr;
};
template<class T, class IndexExpr>
diff --git a/boost/compute/iterator/function_input_iterator.hpp b/boost/compute/iterator/function_input_iterator.hpp
index bd89b6c0fc..bb8f0d5d09 100644
--- a/boost/compute/iterator/function_input_iterator.hpp
+++ b/boost/compute/iterator/function_input_iterator.hpp
@@ -53,7 +53,7 @@ struct function_input_iterator_expr
{
}
- Function m_function;
+ const Function m_function;
};
template<class Function>
diff --git a/boost/compute/iterator/permutation_iterator.hpp b/boost/compute/iterator/permutation_iterator.hpp
index 8a7f97a402..a0d2dbfbc5 100644
--- a/boost/compute/iterator/permutation_iterator.hpp
+++ b/boost/compute/iterator/permutation_iterator.hpp
@@ -60,9 +60,9 @@ struct permutation_iterator_access_expr
{
}
- ElementIterator m_element_iter;
- IndexIterator m_index_iter;
- IndexExpr m_expr;
+ const ElementIterator m_element_iter;
+ const IndexIterator m_index_iter;
+ const IndexExpr m_expr;
};
template<class ElementIterator, class IndexIterator, class IndexExpr>
diff --git a/boost/compute/iterator/strided_iterator.hpp b/boost/compute/iterator/strided_iterator.hpp
index 52e7f07bd8..eb342dc33d 100644
--- a/boost/compute/iterator/strided_iterator.hpp
+++ b/boost/compute/iterator/strided_iterator.hpp
@@ -56,8 +56,8 @@ struct stride_expr
{
}
- IndexExpr m_index_expr;
- Stride m_stride;
+ const IndexExpr m_index_expr;
+ const Stride m_stride;
};
template<class IndexExpr, class Stride>
@@ -90,9 +90,9 @@ struct strided_iterator_index_expr
{
}
- Iterator m_input_iter;
- const Stride& m_stride;
- IndexExpr m_index_expr;
+ const Iterator m_input_iter;
+ const Stride m_stride;
+ const IndexExpr m_index_expr;
};
template<class Iterator, class Stride, class IndexExpr>
diff --git a/boost/compute/iterator/transform_iterator.hpp b/boost/compute/iterator/transform_iterator.hpp
index c040922f9d..08ff6ee4d4 100644
--- a/boost/compute/iterator/transform_iterator.hpp
+++ b/boost/compute/iterator/transform_iterator.hpp
@@ -76,9 +76,9 @@ struct transform_iterator_index_expr
{
}
- InputIterator m_input_iter;
- UnaryFunction m_transform_expr;
- IndexExpr m_index_expr;
+ const InputIterator m_input_iter;
+ const UnaryFunction m_transform_expr;
+ const IndexExpr m_index_expr;
};
template<class InputIterator, class UnaryFunction, class IndexExpr>
diff --git a/boost/compute/iterator/zip_iterator.hpp b/boost/compute/iterator/zip_iterator.hpp
index 2860d73a93..a4af6a3034 100644
--- a/boost/compute/iterator/zip_iterator.hpp
+++ b/boost/compute/iterator/zip_iterator.hpp
@@ -92,8 +92,8 @@ struct zip_iterator_index_expr
{
}
- IteratorTuple m_iterators;
- IndexExpr m_index_expr;
+ const IteratorTuple m_iterators;
+ const IndexExpr m_index_expr;
};
/// \internal_
diff --git a/boost/compute/kernel.hpp b/boost/compute/kernel.hpp
index 72f21a0378..097cba692e 100644
--- a/boost/compute/kernel.hpp
+++ b/boost/compute/kernel.hpp
@@ -168,7 +168,7 @@ public:
typename detail::get_object_info_type<kernel, Enum>::type
get_info() const;
- #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
/// Returns information about the argument at \p index.
///
/// For example, to get the name of the first argument:
@@ -197,7 +197,7 @@ public:
template<int Enum>
typename detail::get_object_info_type<kernel, Enum>::type
get_arg_info(size_t index) const;
- #endif // CL_VERSION_1_2
+ #endif // BOOST_COMPUTE_CL_VERSION_1_2
/// Returns work-group information for the kernel with \p device.
///
@@ -266,7 +266,7 @@ public:
/// \internal_
void set_arg_svm_ptr(size_t index, void* ptr)
{
- #ifdef CL_VERSION_2_0
+ #ifdef BOOST_COMPUTE_CL_VERSION_2_0
cl_int ret = clSetKernelArgSVMPointer(m_kernel, static_cast<cl_uint>(index), ptr);
if(ret != CL_SUCCESS){
BOOST_THROW_EXCEPTION(opencl_error(ret));
@@ -289,7 +289,7 @@ public:
}
#endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
- #if defined(CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
/// Sets additional execution information for the kernel.
///
/// \opencl_version_warning{2,0}
@@ -302,7 +302,7 @@ public:
BOOST_THROW_EXCEPTION(opencl_error(ret));
}
}
- #endif // CL_VERSION_2_0
+ #endif // BOOST_COMPUTE_CL_VERSION_2_0
/// Returns \c true if the kernel is the same at \p other.
bool operator==(const kernel &other) const
@@ -365,14 +365,14 @@ BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel,
((cl_program, CL_KERNEL_PROGRAM))
)
-#ifdef CL_VERSION_1_2
+#ifdef BOOST_COMPUTE_CL_VERSION_1_2
BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel,
((std::string, CL_KERNEL_ATTRIBUTES))
)
-#endif // CL_VERSION_1_2
+#endif // BOOST_COMPUTE_CL_VERSION_1_2
/// \internal_ define get_arg_info() specializations for kernel
-#ifdef CL_VERSION_1_2
+#ifdef BOOST_COMPUTE_CL_VERSION_1_2
#define BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(result_type, value) \
namespace detail { \
template<> struct get_object_info_type<kernel, value> { typedef result_type type; }; \
@@ -386,7 +386,7 @@ BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_acc
BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(std::string, CL_KERNEL_ARG_TYPE_NAME)
BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_type_qualifier, CL_KERNEL_ARG_TYPE_QUALIFIER)
BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(std::string, CL_KERNEL_ARG_NAME)
-#endif // CL_VERSION_1_2
+#endif // BOOST_COMPUTE_CL_VERSION_1_2
namespace detail {
diff --git a/boost/compute/lambda/context.hpp b/boost/compute/lambda/context.hpp
index ed25b79475..a7248d4639 100644
--- a/boost/compute/lambda/context.hpp
+++ b/boost/compute/lambda/context.hpp
@@ -78,6 +78,41 @@ struct context : proto::callable_context<context<Args> >
stream << stream.lit(x);
}
+ void operator()(proto::tag::terminal, const uchar_ &x)
+ {
+ stream << "(uchar)(" << stream.lit(uint_(x)) << "u)";
+ }
+
+ void operator()(proto::tag::terminal, const char_ &x)
+ {
+ stream << "(char)(" << stream.lit(int_(x)) << ")";
+ }
+
+ void operator()(proto::tag::terminal, const ushort_ &x)
+ {
+ stream << "(ushort)(" << stream.lit(x) << "u)";
+ }
+
+ void operator()(proto::tag::terminal, const short_ &x)
+ {
+ stream << "(short)(" << stream.lit(x) << ")";
+ }
+
+ void operator()(proto::tag::terminal, const uint_ &x)
+ {
+ stream << "(" << stream.lit(x) << "u)";
+ }
+
+ void operator()(proto::tag::terminal, const ulong_ &x)
+ {
+ stream << "(" << stream.lit(x) << "ul)";
+ }
+
+ void operator()(proto::tag::terminal, const long_ &x)
+ {
+ stream << "(" << stream.lit(x) << "l)";
+ }
+
// handle placeholders
template<int I>
void operator()(proto::tag::terminal, placeholder<I>)
diff --git a/boost/compute/lambda/functional.hpp b/boost/compute/lambda/functional.hpp
index dd7190e4d9..42e75e4991 100644
--- a/boost/compute/lambda/functional.hpp
+++ b/boost/compute/lambda/functional.hpp
@@ -22,6 +22,11 @@
#include <boost/compute/lambda/result_of.hpp>
#include <boost/compute/lambda/placeholder.hpp>
+#include <boost/compute/types/fundamental.hpp>
+#include <boost/compute/type_traits/scalar_type.hpp>
+#include <boost/compute/type_traits/vector_size.hpp>
+#include <boost/compute/type_traits/make_vector_type.hpp>
+
namespace boost {
namespace compute {
namespace lambda {
@@ -29,7 +34,8 @@ namespace lambda {
namespace mpl = boost::mpl;
namespace proto = boost::proto;
-// wraps a unary boolean function
+// wraps a unary boolean function whose result type is an int_ when the argument
+// type is a scalar, and intN_ if the argument type is a vector of size N
#define BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(name) \
namespace detail { \
struct BOOST_PP_CAT(name, _func) \
@@ -37,7 +43,12 @@ namespace proto = boost::proto;
template<class Expr, class Args> \
struct lambda_result \
{ \
- typedef int type; \
+ typedef typename proto::result_of::child_c<Expr, 1>::type Arg; \
+ typedef typename ::boost::compute::lambda::result_of<Arg, Args>::type result_type; \
+ typedef typename ::boost::compute::make_vector_type< \
+ ::boost::compute::int_, \
+ ::boost::compute::vector_size<result_type>::value \
+ >::type type; \
}; \
\
template<class Context, class Arg> \
@@ -60,7 +71,7 @@ namespace proto = boost::proto;
); \
}
-// wraps a unary function who's return type is the same as the argument type
+// wraps a unary function whose return type is the same as the argument type
#define BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(name) \
namespace detail { \
struct BOOST_PP_CAT(name, _func) \
@@ -92,7 +103,79 @@ namespace proto = boost::proto;
); \
}
-// wraps a binary function
+// wraps a unary function whose result type is the scalar type of the first argument
+#define BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_ST(name) \
+ namespace detail { \
+ struct BOOST_PP_CAT(name, _func) \
+ { \
+ template<class Expr, class Args> \
+ struct lambda_result \
+ { \
+ typedef typename proto::result_of::child_c<Expr, 1>::type Arg; \
+ typedef typename ::boost::compute::lambda::result_of<Arg, Args>::type result_type; \
+ typedef typename ::boost::compute::scalar_type<result_type>::type type; \
+ }; \
+ \
+ template<class Context, class Arg> \
+ static void apply(Context &ctx, const Arg &arg) \
+ { \
+ ctx.stream << #name << "("; \
+ proto::eval(arg, ctx); \
+ ctx.stream << ")"; \
+ } \
+ }; \
+ } \
+ template<class Arg> \
+ inline typename proto::result_of::make_expr< \
+ proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg& \
+ >::type const \
+ name(const Arg &arg) \
+ { \
+ return proto::make_expr<proto::tag::function>( \
+ BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg) \
+ ); \
+ }
+
+// wraps a binary boolean function whose result type is an int_ when the first
+// argument type is a scalar, and intN_ if the first argument type is a vector
+// of size N
+#define BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(name) \
+ namespace detail { \
+ struct BOOST_PP_CAT(name, _func) \
+ { \
+ template<class Expr, class Args> \
+ struct lambda_result \
+ { \
+ typedef typename proto::result_of::child_c<Expr, 1>::type Arg1; \
+ typedef typename ::boost::compute::make_vector_type< \
+ ::boost::compute::int_, \
+ ::boost::compute::vector_size<Arg1>::value \
+ >::type type; \
+ }; \
+ \
+ template<class Context, class Arg1, class Arg2> \
+ static void apply(Context &ctx, const Arg1 &arg1, const Arg2 &arg2) \
+ { \
+ ctx.stream << #name << "("; \
+ proto::eval(arg1, ctx); \
+ ctx.stream << ", "; \
+ proto::eval(arg2, ctx); \
+ ctx.stream << ")"; \
+ } \
+ }; \
+ } \
+ template<class Arg1, class Arg2> \
+ inline typename proto::result_of::make_expr< \
+ proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg1&, const Arg2& \
+ >::type const \
+ name(const Arg1 &arg1, const Arg2 &arg2) \
+ { \
+ return proto::make_expr<proto::tag::function>( \
+ BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg1), ::boost::ref(arg2) \
+ ); \
+ }
+
+// wraps a binary function whose result type is the type of the first argument
#define BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(name) \
namespace detail { \
struct BOOST_PP_CAT(name, _func) \
@@ -126,6 +209,40 @@ namespace proto = boost::proto;
); \
}
+// wraps a binary function whose result type is the type of the second argument
+#define BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_2(name) \
+ namespace detail { \
+ struct BOOST_PP_CAT(name, _func) \
+ { \
+ template<class Expr, class Args> \
+ struct lambda_result \
+ { \
+ typedef typename proto::result_of::child_c<Expr, 2>::type Arg2; \
+ typedef typename ::boost::compute::lambda::result_of<Arg2, Args>::type type; \
+ }; \
+ \
+ template<class Context, class Arg1, class Arg2> \
+ static void apply(Context &ctx, const Arg1 &arg1, const Arg2 &arg2) \
+ { \
+ ctx.stream << #name << "("; \
+ proto::eval(arg1, ctx); \
+ ctx.stream << ", "; \
+ proto::eval(arg2, ctx); \
+ ctx.stream << ")"; \
+ } \
+ }; \
+ } \
+ template<class Arg1, class Arg2> \
+ inline typename proto::result_of::make_expr< \
+ proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg1&, const Arg2& \
+ >::type const \
+ name(const Arg1 &arg1, const Arg2 &arg2) \
+ { \
+ return proto::make_expr<proto::tag::function>( \
+ BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg1), ::boost::ref(arg2) \
+ ); \
+ }
+
// wraps a binary function who's result type is the scalar type of the first argument
#define BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_ST(name) \
namespace detail { \
@@ -161,6 +278,41 @@ namespace proto = boost::proto;
); \
}
+// wraps a binary function whose result type is the type of the first argument
+// and the second argument is a pointer
+#define BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_PTR(name) \
+ namespace detail { \
+ struct BOOST_PP_CAT(name, _func) \
+ { \
+ template<class Expr, class Args> \
+ struct lambda_result \
+ { \
+ typedef typename proto::result_of::child_c<Expr, 1>::type Arg1; \
+ typedef typename ::boost::compute::lambda::result_of<Arg1, Args>::type type; \
+ }; \
+ \
+ template<class Context, class Arg1, class Arg2> \
+ static void apply(Context &ctx, const Arg1 &arg1, const Arg2 &arg2) \
+ { \
+ ctx.stream << #name << "("; \
+ proto::eval(arg1, ctx); \
+ ctx.stream << ", &"; \
+ proto::eval(arg2, ctx); \
+ ctx.stream << ")"; \
+ } \
+ }; \
+ } \
+ template<class Arg1, class Arg2> \
+ inline typename proto::result_of::make_expr< \
+ proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg1&, const Arg2& \
+ >::type const \
+ name(const Arg1 &arg1, const Arg2 &arg2) \
+ { \
+ return proto::make_expr<proto::tag::function>( \
+ BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg1), ::boost::ref(arg2) \
+ ); \
+ }
+
// wraps a ternary function
#define BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(name) \
namespace detail { \
@@ -197,43 +349,246 @@ namespace proto = boost::proto;
); \
}
+// wraps a ternary function whose result type is the type of the third argument
+#define BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION_3(name) \
+ namespace detail { \
+ struct BOOST_PP_CAT(name, _func) \
+ { \
+ template<class Expr, class Args> \
+ struct lambda_result \
+ { \
+ typedef typename proto::result_of::child_c<Expr, 3>::type Arg3; \
+ typedef typename ::boost::compute::lambda::result_of<Arg3, Args>::type type; \
+ }; \
+ \
+ template<class Context, class Arg1, class Arg2, class Arg3> \
+ static void apply(Context &ctx, const Arg1 &arg1, const Arg2 &arg2, const Arg3 &arg3) \
+ { \
+ ctx.stream << #name << "("; \
+ proto::eval(arg1, ctx); \
+ ctx.stream << ", "; \
+ proto::eval(arg2, ctx); \
+ ctx.stream << ", "; \
+ proto::eval(arg3, ctx); \
+ ctx.stream << ")"; \
+ } \
+ }; \
+ } \
+ template<class Arg1, class Arg2, class Arg3> \
+ inline typename proto::result_of::make_expr< \
+ proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg1&, const Arg2&, const Arg3& \
+ >::type const \
+ name(const Arg1 &arg1, const Arg2 &arg2, const Arg3 &arg3) \
+ { \
+ return proto::make_expr<proto::tag::function>( \
+ BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg1), ::boost::ref(arg2), ::boost::ref(arg3) \
+ ); \
+ }
-BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(all)
-BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(any)
-BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(isinf)
-BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(isnan)
-BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(isfinite)
+// wraps a ternary function whose result type is the type of the first argument
+// and the third argument of the function is a pointer
+#define BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION_PTR(name) \
+ namespace detail { \
+ struct BOOST_PP_CAT(name, _func) \
+ { \
+ template<class Expr, class Args> \
+ struct lambda_result \
+ { \
+ typedef typename proto::result_of::child_c<Expr, 3>::type Arg3; \
+ typedef typename ::boost::compute::lambda::result_of<Arg3, Args>::type type; \
+ }; \
+ \
+ template<class Context, class Arg1, class Arg2, class Arg3> \
+ static void apply(Context &ctx, const Arg1 &arg1, const Arg2 &arg2, const Arg3 &arg3) \
+ { \
+ ctx.stream << #name << "("; \
+ proto::eval(arg1, ctx); \
+ ctx.stream << ", "; \
+ proto::eval(arg2, ctx); \
+ ctx.stream << ", &"; \
+ proto::eval(arg3, ctx); \
+ ctx.stream << ")"; \
+ } \
+ }; \
+ } \
+ template<class Arg1, class Arg2, class Arg3> \
+ inline typename proto::result_of::make_expr< \
+ proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg1&, const Arg2&, const Arg3& \
+ >::type const \
+ name(const Arg1 &arg1, const Arg2 &arg2, const Arg3 &arg3) \
+ { \
+ return proto::make_expr<proto::tag::function>( \
+ BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg1), ::boost::ref(arg2), ::boost::ref(arg3) \
+ ); \
+ }
+// Common Built-In Functions
+BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(clamp)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(degrees)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(min)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(max)
+BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(mix)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(radians)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(sign)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_2(step)
+BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION_3(smoothstep)
+
+// Geometric Built-In Functions
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(cross)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_ST(dot)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_ST(distance)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_ST(length)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(normalize)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_ST(fast_distance)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_ST(fast_length)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(fast_normalize)
+
+// Integer Built-In Functions
BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(abs)
-BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(cos)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(abs_diff)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(add_sat)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(hadd)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(rhadd)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(clz)
+#ifdef BOOST_COMPUTE_CL_VERSION_2_0
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(ctz)
+#endif
+// clamp() (since 1.1) already defined in common
+BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(mad_hi)
+BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(mad24)
+BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(mad_sat)
+// max() and min() functions are defined in common
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(mul_hi)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(mul24)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(rotate)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(sub_sat)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(upsample)
+#ifdef BOOST_COMPUTE_CL_VERSION_1_2
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(popcount)
+#endif
+
+// Math Built-In Functions
BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(acos)
-BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(sin)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(acosh)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(acospi)
BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(asin)
-BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(tan)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(asinh)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(asinpi)
BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(atan)
-BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(sqrt)
-BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(rsqrt)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(atan2)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(atanh)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(atanpi)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(atan2pi)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(cbrt)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(ceil)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(copysign)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(cos)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(cosh)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(cospi)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(erfc)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(erf)
BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(exp)
BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(exp2)
BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(exp10)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(expm1)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(fabs)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(fdim)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(floor)
+BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(fma)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(fmax)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(fmin)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(fmod)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_PTR(fract)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_PTR(frexp)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(hypot)
+BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(ilogb) // ilogb returns intN_
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(ldexp)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(lgamma)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_PTR(lgamma_r)
BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(log)
BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(log2)
BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(log10)
-BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(round)
-BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(length)
-
-BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(cross)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(log1p)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(logb)
+BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(mad)
+#ifdef BOOST_COMPUTE_CL_VERSION_1_1
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(maxmag)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(minmag)
+#endif
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_PTR(modf)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(nan)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(nextafter)
BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(pow)
BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(pown)
BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(powr)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(remainder)
+BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION_PTR(remquo)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(rint)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(rootn)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(round)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(rsqrt)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(sin)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(sincos)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(sinh)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(sinpi)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(sqrt)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(tan)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(tanh)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(tanpi)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(tgamma)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(trunc)
-BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_ST(dot)
-BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_ST(distance)
+// Native Math Built-In Functions
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_cos)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(native_divide)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_exp)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_exp2)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_exp10)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_log)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_log2)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_log10)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(native_powr)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_recip)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_rsqrt)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_sin)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_sqrt)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_tan)
-BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(clamp)
-BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(fma)
-BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(mad)
-BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(smoothstep)
+// Half Math Built-In Functions
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_cos)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(half_divide)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_exp)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_exp2)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_exp10)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_log)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_log2)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_log10)
+BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(half_powr)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_recip)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_rsqrt)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_sin)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_sqrt)
+BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_tan)
+
+// Relational Built-In Functions
+BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(isequal)
+BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(isnotequal)
+BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(isgreater)
+BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(isgreaterequal)
+BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(isless)
+BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(islessequal)
+BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(islessgreater)
+BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(isfinite)
+BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(isinf)
+BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(isnan)
+BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(isnormal)
+BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(isordered)
+BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(isunordered)
+BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(singbit)
+BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(all)
+BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(any)
+BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(bitselect)
+BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(select)
} // end lambda namespace
} // end compute namespace
diff --git a/boost/compute/memory/svm_ptr.hpp b/boost/compute/memory/svm_ptr.hpp
index 0c9d88035c..c8753f5b34 100644
--- a/boost/compute/memory/svm_ptr.hpp
+++ b/boost/compute/memory/svm_ptr.hpp
@@ -29,7 +29,7 @@ template<class T>
class svm_ptr;
// svm functions require OpenCL 2.0
-#if defined(CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+#if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
namespace detail {
template<class T, class IndexExpr>
@@ -131,8 +131,18 @@ public:
return m_context;
}
+ bool operator==(const svm_ptr<T>& other) const
+ {
+ return (other.m_context == m_context) && (m_ptr == other.m_ptr);
+ }
+
+ bool operator!=(const svm_ptr<T>& other) const
+ {
+ return (other.m_context != m_context) || (m_ptr != other.m_ptr);
+ }
+
// svm functions require OpenCL 2.0
- #if defined(CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
/// \internal_
template<class Expr>
detail::svm_ptr_index_expr<T, Expr>
diff --git a/boost/compute/memory_object.hpp b/boost/compute/memory_object.hpp
index 14c4cf4c7e..75c8738f8f 100644
--- a/boost/compute/memory_object.hpp
+++ b/boost/compute/memory_object.hpp
@@ -38,7 +38,7 @@ public:
use_host_ptr = CL_MEM_USE_HOST_PTR,
alloc_host_ptr = CL_MEM_ALLOC_HOST_PTR,
copy_host_ptr = CL_MEM_COPY_HOST_PTR
- #ifdef CL_VERSION_1_2
+ #ifdef BOOST_COMPUTE_CL_VERSION_1_2
,
host_write_only = CL_MEM_HOST_WRITE_ONLY,
host_read_only = CL_MEM_HOST_READ_ONLY,
@@ -105,7 +105,7 @@ public:
return detail::get_object_info<T>(clGetMemObjectInfo, m_mem, info);
}
- #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ #if defined(BOOST_COMPUTE_CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
/// Registers a function to be called when the memory object is deleted
/// and its resources freed.
///
@@ -137,7 +137,7 @@ public:
new boost::function<void()>(callback)
);
}
- #endif // CL_VERSION_1_1
+ #endif // BOOST_COMPUTE_CL_VERSION_1_1
/// Returns \c true if the memory object is the same as \p other.
bool operator==(const memory_object &other) const
@@ -152,7 +152,7 @@ public:
}
private:
- #ifdef CL_VERSION_1_1
+ #ifdef BOOST_COMPUTE_CL_VERSION_1_1
/// \internal_
static void BOOST_COMPUTE_CL_CALLBACK
destructor_callback_invoker(cl_mem, void *user_data)
@@ -164,7 +164,7 @@ private:
delete callback;
}
- #endif // CL_VERSION_1_1
+ #endif // BOOST_COMPUTE_CL_VERSION_1_1
protected:
/// \internal_
diff --git a/boost/compute/pipe.hpp b/boost/compute/pipe.hpp
index 944674e622..907750a3ba 100644
--- a/boost/compute/pipe.hpp
+++ b/boost/compute/pipe.hpp
@@ -18,7 +18,7 @@
#include <boost/compute/detail/get_object_info.hpp>
// pipe objects require opencl 2.0
-#if defined(CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+#if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
namespace boost {
namespace compute {
@@ -149,6 +149,6 @@ struct set_kernel_arg<pipe>
} // end compute namespace
} // end boost namespace
-#endif // CL_VERSION_2_0
+#endif // BOOST_COMPUTE_CL_VERSION_2_0
#endif // BOOST_COMPUTE_PIPE_HPP
diff --git a/boost/compute/platform.hpp b/boost/compute/platform.hpp
index 65fda84e11..fddfec8c11 100644
--- a/boost/compute/platform.hpp
+++ b/boost/compute/platform.hpp
@@ -181,7 +181,7 @@ public:
/// function. Returns \c 0 if \p function_name is invalid.
void* get_extension_function_address(const char *function_name) const
{
- #ifdef CL_VERSION_1_2
+ #ifdef BOOST_COMPUTE_CL_VERSION_1_2
return clGetExtensionFunctionAddressForPlatform(m_platform,
function_name);
#else
@@ -192,7 +192,7 @@ public:
/// Requests that the platform unload any compiler resources.
void unload_compiler()
{
- #ifdef CL_VERSION_1_2
+ #ifdef BOOST_COMPUTE_CL_VERSION_1_2
clUnloadPlatformCompiler(m_platform);
#else
clUnloadCompiler();
diff --git a/boost/compute/program.hpp b/boost/compute/program.hpp
index 7573aa02e6..e953d7a89e 100644
--- a/boost/compute/program.hpp
+++ b/boost/compute/program.hpp
@@ -272,13 +272,15 @@ public:
}
}
- #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
/// Compiles the program with \p options.
///
/// \opencl_version_warning{1,2}
///
/// \see_opencl_ref{clCompileProgram}
- void compile(const std::string &options = std::string())
+ void compile(const std::string &options = std::string(),
+ const std::vector<std::pair<std::string, program> > &headers =
+ std::vector<std::pair<std::string, program> >())
{
const char *options_string = 0;
@@ -286,9 +288,36 @@ public:
options_string = options.c_str();
}
- cl_int ret = clCompileProgram(
- m_program, 0, 0, options_string, 0, 0, 0, 0, 0
- );
+ cl_int ret;
+ if (headers.empty())
+ {
+ ret = clCompileProgram(
+ m_program, 0, 0, options_string, 0, 0, 0, 0, 0
+ );
+ }
+ else
+ {
+ std::vector<const char*> header_names(headers.size());
+ std::vector<cl_program> header_programs(headers.size());
+ for (size_t i = 0; i < headers.size(); ++i)
+ {
+ header_names[i] = headers[i].first.c_str();
+ header_programs[i] = headers[i].second.m_program;
+ }
+
+ ret = clCompileProgram(
+ m_program,
+ 0,
+ 0,
+ options_string,
+ static_cast<cl_uint>(headers.size()),
+ header_programs.data(),
+ header_names.data(),
+ 0,
+ 0
+ );
+ }
+
if(ret != CL_SUCCESS){
BOOST_THROW_EXCEPTION(opencl_error(ret));
@@ -329,7 +358,7 @@ public:
return program(program_, false);
}
- #endif // CL_VERSION_1_2
+ #endif // BOOST_COMPUTE_CL_VERSION_1_2
/// Returns the build log.
std::string build_log() const
@@ -432,6 +461,33 @@ public:
return create_with_source(source, context);
}
+ /// Creates a new program with \p files in \p context.
+ ///
+ /// \see_opencl_ref{clCreateProgramWithSource}
+ static program create_with_source_file(const std::vector<std::string> &files,
+ const context &context)
+ {
+ std::vector<std::string> sources(files.size());
+
+ for(size_t i = 0; i < files.size(); ++i) {
+ // open file stream
+ std::ifstream stream(files[i].c_str());
+
+ if(stream.fail()){
+ BOOST_THROW_EXCEPTION(std::ios_base::failure("failed to create stream."));
+ }
+
+ // read source
+ sources[i] = std::string(
+ (std::istreambuf_iterator<char>(stream)),
+ std::istreambuf_iterator<char>()
+ );
+ }
+
+ // create program
+ return create_with_source(sources, context);
+ }
+
/// Creates a new program with \p binary of \p binary_size in
/// \p context.
///
@@ -489,7 +545,7 @@ public:
return create_with_binary(&binary[0], binary.size(), context);
}
- #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+ #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
/// Creates a new program with the built-in kernels listed in
/// \p kernel_names for \p devices in \p context.
///
@@ -516,7 +572,7 @@ public:
return program(program_, false);
}
- #endif // CL_VERSION_1_2
+ #endif // BOOST_COMPUTE_CL_VERSION_1_2
/// Create a new program with \p source in \p context and builds it with \p options.
/**
@@ -543,10 +599,11 @@ public:
.process( options )
.process( source )
;
+ std::string hash_string = hash;
// Try to get cached program binaries:
try {
- boost::optional<program> prog = load_program_binary(hash, context);
+ boost::optional<program> prog = load_program_binary(hash_string, context);
if (prog) {
prog->build(options);
@@ -575,7 +632,7 @@ public:
#ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE
// Save program binaries for future reuse.
- save_program_binary(hash, prog);
+ save_program_binary(hash_string, prog);
#endif
return prog;
@@ -637,12 +694,12 @@ BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(program,
((std::vector<unsigned char *>, CL_PROGRAM_BINARIES))
)
-#ifdef CL_VERSION_1_2
+#ifdef BOOST_COMPUTE_CL_VERSION_1_2
BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(program,
((size_t, CL_PROGRAM_NUM_KERNELS))
((std::string, CL_PROGRAM_KERNEL_NAMES))
)
-#endif // CL_VERSION_1_2
+#endif // BOOST_COMPUTE_CL_VERSION_1_2
} // end compute namespace
} // end boost namespace
diff --git a/boost/compute/svm.hpp b/boost/compute/svm.hpp
index 4bc3a74237..da493a9388 100644
--- a/boost/compute/svm.hpp
+++ b/boost/compute/svm.hpp
@@ -16,7 +16,7 @@
#include <boost/compute/memory/svm_ptr.hpp>
// svm functions require OpenCL 2.0
-#if defined(CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+#if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
namespace boost {
namespace compute {
@@ -67,6 +67,6 @@ inline void svm_free(const context &context, svm_ptr<T> ptr)
} // end compute namespace
} // end boost namespace
-#endif // CL_VERSION_2_0
+#endif // BOOST_COMPUTE_CL_VERSION_2_0
#endif // BOOST_COMPUTE_PIPE_HPP
diff --git a/boost/compute/system.hpp b/boost/compute/system.hpp
index e6ed353d68..5f29893b9d 100644
--- a/boost/compute/system.hpp
+++ b/boost/compute/system.hpp
@@ -55,6 +55,10 @@ public:
/// name of the platform (e.g. "NVIDIA CUDA")
/// \li \c BOOST_COMPUTE_DEFAULT_VENDOR -
/// name of the device vendor (e.g. "NVIDIA")
+ /// \li \c BOOST_COMPUTE_DEFAULT_ENFORCE -
+ /// If this is set to "1", then throw a no_device_found() exception
+ /// if any of the above environment variables is set, but a matching
+ /// device was not found.
///
/// The default device is determined once on the first time this function
/// is called. Calling this function multiple times will always result in
@@ -220,6 +224,7 @@ private:
const char *type = detail::getenv("BOOST_COMPUTE_DEFAULT_DEVICE_TYPE");
const char *platform = detail::getenv("BOOST_COMPUTE_DEFAULT_PLATFORM");
const char *vendor = detail::getenv("BOOST_COMPUTE_DEFAULT_VENDOR");
+ const char *enforce = detail::getenv("BOOST_COMPUTE_DEFAULT_ENFORCE");
if(name || type || platform || vendor){
for(size_t i = 0; i < devices_.size(); i++){
@@ -243,6 +248,9 @@ private:
return device;
}
+
+ if(enforce && enforce[0] == '1')
+ BOOST_THROW_EXCEPTION(no_device_found());
}
// find the first gpu device
diff --git a/boost/compute/type_traits/type_definition.hpp b/boost/compute/type_traits/type_definition.hpp
index de9095fbd2..3dcc4607fa 100644
--- a/boost/compute/type_traits/type_definition.hpp
+++ b/boost/compute/type_traits/type_definition.hpp
@@ -18,7 +18,10 @@ namespace compute {
namespace detail {
template<class T>
-struct type_definition_trait;
+struct type_definition_trait
+{
+ static std::string value() { return std::string(); }
+};
} // end detail namespace
diff --git a/boost/compute/types/tuple.hpp b/boost/compute/types/tuple.hpp
index 095bd95448..bc2971e49e 100644
--- a/boost/compute/types/tuple.hpp
+++ b/boost/compute/types/tuple.hpp
@@ -206,7 +206,7 @@ inline meta_kernel& operator<<(meta_kernel &kernel, \
typedef typename boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> T; \
BOOST_STATIC_ASSERT(N < size_t(boost::tuples::length<T>::value)); \
kernel.inject_type<T>(); \
- return kernel << expr.m_arg << ".v" << uint_(N); \
+ return kernel << expr.m_arg << ".v" << int_(N); \
}
BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_GET_N, ~)
diff --git a/boost/compute/user_event.hpp b/boost/compute/user_event.hpp
index a3fdba033e..6981b9c7f8 100644
--- a/boost/compute/user_event.hpp
+++ b/boost/compute/user_event.hpp
@@ -17,7 +17,7 @@
namespace boost {
namespace compute {
-#if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
+#if defined(BOOST_COMPUTE_CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
/// \class user_event
/// \brief An user-created event.
///
@@ -80,7 +80,7 @@ public:
}
}
};
-#endif // CL_VERSION_1_1
+#endif // BOOST_COMPUTE_CL_VERSION_1_1
} // end compute namespace
} // end boost namespace