diff options
author | DongHun Kwak <dh0128.kwak@samsung.com> | 2017-09-13 11:24:46 +0900 |
---|---|---|
committer | DongHun Kwak <dh0128.kwak@samsung.com> | 2017-09-13 11:25:39 +0900 |
commit | 4fadd968fa12130524c8380f33fcfe25d4de79e5 (patch) | |
tree | fd26a490cd15388d42fc6652b3c5c13012e7f93e /boost/compute | |
parent | b5c87084afaef42b2d058f68091be31988a6a874 (diff) | |
download | boost-4fadd968fa12130524c8380f33fcfe25d4de79e5.tar.gz boost-4fadd968fa12130524c8380f33fcfe25d4de79e5.tar.bz2 boost-4fadd968fa12130524c8380f33fcfe25d4de79e5.zip |
Imported Upstream version 1.65.0upstream/1.65.0
Change-Id: Icf8400b375482cb11bcf77440a6934ba360d6ba4
Signed-off-by: DongHun Kwak <dh0128.kwak@samsung.com>
Diffstat (limited to 'boost/compute')
132 files changed, 956 insertions, 207 deletions
diff --git a/boost/compute/algorithm/accumulate.hpp b/boost/compute/algorithm/accumulate.hpp index 328420a07c..be20bee60e 100644 --- a/boost/compute/algorithm/accumulate.hpp +++ b/boost/compute/algorithm/accumulate.hpp @@ -26,6 +26,7 @@ namespace boost { namespace compute { namespace detail { +// Space complexity O(1) template<class InputIterator, class T, class BinaryFunction> inline T generic_accumulate(InputIterator first, InputIterator last, @@ -155,6 +156,9 @@ inline T dispatch_accumulate(InputIterator first, /// reduce(vec.begin(), vec.end(), &result, plus<float>()); // fast /// \endcode /// +/// Space complexity: \Omega(1)<br> +/// Space complexity when optimized to \c reduce(): \Omega(n) +/// /// \see reduce() template<class InputIterator, class T, class BinaryFunction> inline T accumulate(InputIterator first, diff --git a/boost/compute/algorithm/adjacent_difference.hpp b/boost/compute/algorithm/adjacent_difference.hpp index ef13970754..c3b0e7d191 100644 --- a/boost/compute/algorithm/adjacent_difference.hpp +++ b/boost/compute/algorithm/adjacent_difference.hpp @@ -64,6 +64,9 @@ dispatch_adjacent_difference(InputIterator first, /// /// \return \c OutputIterator to the end of the result range /// +/// Space complexity: \Omega(1)<br> +/// Space complexity when \p result == \p first: \Omega(n) +/// /// \see adjacent_find() template<class InputIterator, class OutputIterator, class BinaryFunction> inline OutputIterator diff --git a/boost/compute/algorithm/adjacent_find.hpp b/boost/compute/algorithm/adjacent_find.hpp index 992a01eddc..a71a817f57 100644 --- a/boost/compute/algorithm/adjacent_find.hpp +++ b/boost/compute/algorithm/adjacent_find.hpp @@ -114,6 +114,8 @@ adjacent_find_with_atomics(InputIterator first, /// \return \c InputIteratorm to the first element which compares equal /// to the following element. If none are equal, returns \c last. /// +/// Space complexity: \Omega(1) +/// /// \see find(), adjacent_difference() template<class InputIterator, class Compare> inline InputIterator diff --git a/boost/compute/algorithm/all_of.hpp b/boost/compute/algorithm/all_of.hpp index 34d7518f32..56c5809992 100644 --- a/boost/compute/algorithm/all_of.hpp +++ b/boost/compute/algorithm/all_of.hpp @@ -20,6 +20,8 @@ namespace compute { /// Returns \c true if \p predicate returns \c true for all of the elements in /// the range [\p first, \p last). /// +/// Space complexity: \Omega(1) +/// /// \see any_of(), none_of() template<class InputIterator, class UnaryPredicate> inline bool all_of(InputIterator first, diff --git a/boost/compute/algorithm/any_of.hpp b/boost/compute/algorithm/any_of.hpp index b07779597c..54031fbac5 100644 --- a/boost/compute/algorithm/any_of.hpp +++ b/boost/compute/algorithm/any_of.hpp @@ -24,6 +24,8 @@ namespace compute { /// /// \snippet test/test_any_all_none_of.cpp any_of /// +/// Space complexity: \Omega(1) +/// /// \see all_of(), none_of() template<class InputIterator, class UnaryPredicate> inline bool any_of(InputIterator first, diff --git a/boost/compute/algorithm/binary_search.hpp b/boost/compute/algorithm/binary_search.hpp index 6e19498790..880f3628fb 100644 --- a/boost/compute/algorithm/binary_search.hpp +++ b/boost/compute/algorithm/binary_search.hpp @@ -20,6 +20,8 @@ namespace compute { /// Returns \c true if \p value is in the sorted range [\p first, /// \p last). +/// +/// Space complexity: \Omega(1) template<class InputIterator, class T> inline bool binary_search(InputIterator first, InputIterator last, diff --git a/boost/compute/algorithm/copy.hpp b/boost/compute/algorithm/copy.hpp index 7779277b82..4866726f6e 100644 --- a/boost/compute/algorithm/copy.hpp +++ b/boost/compute/algorithm/copy.hpp @@ -826,6 +826,8 @@ dispatch_copy(InputIterator first, /// ); /// \endcode /// +/// Space complexity: \Omega(1) +/// /// \see copy_n(), copy_if(), copy_async() template<class InputIterator, class OutputIterator> inline OutputIterator copy(InputIterator first, diff --git a/boost/compute/algorithm/copy_if.hpp b/boost/compute/algorithm/copy_if.hpp index 3cd08ef293..bdedcb8536 100644 --- a/boost/compute/algorithm/copy_if.hpp +++ b/boost/compute/algorithm/copy_if.hpp @@ -38,6 +38,8 @@ inline OutputIterator copy_index_if(InputIterator first, /// Copies each element in the range [\p first, \p last) for which /// \p predicate returns \c true to the range beginning at \p result. +/// +/// Space complexity: \Omega(2n) template<class InputIterator, class OutputIterator, class Predicate> inline OutputIterator copy_if(InputIterator first, InputIterator last, diff --git a/boost/compute/algorithm/copy_n.hpp b/boost/compute/algorithm/copy_n.hpp index f0989edc67..5280193497 100644 --- a/boost/compute/algorithm/copy_n.hpp +++ b/boost/compute/algorithm/copy_n.hpp @@ -30,6 +30,8 @@ namespace compute { /// boost::compute::copy_n(values, 4, vec.begin(), queue); /// \endcode /// +/// Space complexity: \Omega(1) +/// /// \see copy() template<class InputIterator, class Size, class OutputIterator> inline OutputIterator copy_n(InputIterator first, diff --git a/boost/compute/algorithm/count.hpp b/boost/compute/algorithm/count.hpp index 140d67379f..7a502c6791 100644 --- a/boost/compute/algorithm/count.hpp +++ b/boost/compute/algorithm/count.hpp @@ -23,6 +23,9 @@ namespace compute { /// Returns the number of occurrences of \p value in the range /// [\p first, \p last). /// +/// Space complexity on CPUs: \Omega(1)<br> +/// Space complexity on GPUs: \Omega(n) +/// /// \see count_if() template<class InputIterator, class T> inline size_t count(InputIterator first, diff --git a/boost/compute/algorithm/count_if.hpp b/boost/compute/algorithm/count_if.hpp index c9381ce5d4..81996dc828 100644 --- a/boost/compute/algorithm/count_if.hpp +++ b/boost/compute/algorithm/count_if.hpp @@ -25,6 +25,9 @@ namespace compute { /// Returns the number of elements in the range [\p first, \p last) /// for which \p predicate returns \c true. +/// +/// Space complexity on CPUs: \Omega(1)<br> +/// Space complexity on GPUs: \Omega(n) template<class InputIterator, class Predicate> inline size_t count_if(InputIterator first, InputIterator last, diff --git a/boost/compute/algorithm/detail/copy_on_device.hpp b/boost/compute/algorithm/detail/copy_on_device.hpp index 8738c8c0b4..034b3bc212 100644 --- a/boost/compute/algorithm/detail/copy_on_device.hpp +++ b/boost/compute/algorithm/detail/copy_on_device.hpp @@ -144,7 +144,7 @@ inline future<OutputIterator> copy_on_device_async(InputIterator first, return make_future(result + std::distance(first, last), event_); } -#ifdef CL_VERSION_2_0 +#ifdef BOOST_COMPUTE_CL_VERSION_2_0 // copy_on_device() specialization for svm_ptr template<class T> inline svm_ptr<T> copy_on_device(svm_ptr<T> first, @@ -181,7 +181,7 @@ inline future<svm_ptr<T> > copy_on_device_async(svm_ptr<T> first, return make_future(result + count, event_); } -#endif // CL_VERSION_2_0 +#endif // BOOST_COMPUTE_CL_VERSION_2_0 } // end detail namespace } // end compute namespace diff --git a/boost/compute/algorithm/detail/copy_to_device.hpp b/boost/compute/algorithm/detail/copy_to_device.hpp index bce5975f53..8601bb20ec 100644 --- a/boost/compute/algorithm/detail/copy_to_device.hpp +++ b/boost/compute/algorithm/detail/copy_to_device.hpp @@ -124,7 +124,7 @@ inline future<DeviceIterator> copy_to_device_async(HostIterator first, return make_future(result + static_cast<difference_type>(count), event_); } -#ifdef CL_VERSION_2_0 +#ifdef BOOST_COMPUTE_CL_VERSION_2_0 // copy_to_device() specialization for svm_ptr template<class HostIterator, class T> inline svm_ptr<T> copy_to_device(HostIterator first, @@ -184,7 +184,7 @@ inline svm_ptr<T> copy_to_device_map(HostIterator first, return result + count; } -#endif // CL_VERSION_2_0 +#endif // BOOST_COMPUTE_CL_VERSION_2_0 } // end detail namespace } // end compute namespace diff --git a/boost/compute/algorithm/detail/copy_to_host.hpp b/boost/compute/algorithm/detail/copy_to_host.hpp index d770a996ef..89b57174fa 100644 --- a/boost/compute/algorithm/detail/copy_to_host.hpp +++ b/boost/compute/algorithm/detail/copy_to_host.hpp @@ -125,7 +125,7 @@ inline future<HostIterator> copy_to_host_async(DeviceIterator first, return make_future(iterator_plus_distance(result, count), event_); } -#ifdef CL_VERSION_2_0 +#ifdef BOOST_COMPUTE_CL_VERSION_2_0 // copy_to_host() specialization for svm_ptr template<class T, class HostIterator> inline HostIterator copy_to_host(svm_ptr<T> first, @@ -189,7 +189,7 @@ inline HostIterator copy_to_host_map(svm_ptr<T> first, return iterator_plus_distance(result, count); } -#endif // CL_VERSION_2_0 +#endif // BOOST_COMPUTE_CL_VERSION_2_0 } // end detail namespace } // end compute namespace diff --git a/boost/compute/algorithm/detail/find_extrema.hpp b/boost/compute/algorithm/detail/find_extrema.hpp index eef2e36c3c..734b75aa90 100644 --- a/boost/compute/algorithm/detail/find_extrema.hpp +++ b/boost/compute/algorithm/detail/find_extrema.hpp @@ -56,7 +56,7 @@ inline InputIterator find_extrema(InputIterator first, // use serial method for OpenCL version 1.0 due to // problems with atomic_cmpxchg() - #ifndef CL_VERSION_1_1 + #ifndef BOOST_COMPUTE_CL_VERSION_1_1 return serial_find_extrema(first, last, compare, find_minimum, queue); #endif diff --git a/boost/compute/algorithm/detail/find_extrema_with_reduce.hpp b/boost/compute/algorithm/detail/find_extrema_with_reduce.hpp index 8f2a83c38b..515d7cc6da 100644 --- a/boost/compute/algorithm/detail/find_extrema_with_reduce.hpp +++ b/boost/compute/algorithm/detail/find_extrema_with_reduce.hpp @@ -246,6 +246,7 @@ inline void find_extrema_with_reduce(InputIterator input, ); } +// Space complexity: \Omega(2 * work-group-size * work-groups-per-compute-unit) template<class InputIterator, class Compare> InputIterator find_extrema_with_reduce(InputIterator first, InputIterator last, diff --git a/boost/compute/algorithm/detail/find_if_with_atomics.hpp b/boost/compute/algorithm/detail/find_if_with_atomics.hpp index 112c34cf00..e14fd12ae0 100644 --- a/boost/compute/algorithm/detail/find_if_with_atomics.hpp +++ b/boost/compute/algorithm/detail/find_if_with_atomics.hpp @@ -153,6 +153,7 @@ inline InputIterator find_if_with_atomics_multiple_vpt(InputIterator first, return first + static_cast<difference_type>(index.read(queue)); } +// Space complexity: O(1) template<class InputIterator, class UnaryPredicate> inline InputIterator find_if_with_atomics(InputIterator first, InputIterator last, diff --git a/boost/compute/algorithm/detail/merge_sort_on_gpu.hpp b/boost/compute/algorithm/detail/merge_sort_on_gpu.hpp index e62c6beb8d..d5e1a2d8c9 100644 --- a/boost/compute/algorithm/detail/merge_sort_on_gpu.hpp +++ b/boost/compute/algorithm/detail/merge_sort_on_gpu.hpp @@ -91,6 +91,7 @@ inline size_t bitonic_block_sort(KeyIterator keys_first, command_queue &queue) { typedef typename std::iterator_traits<KeyIterator>::value_type key_type; + typedef typename std::iterator_traits<ValueIterator>::value_type value_type; meta_kernel k("bitonic_block_sort"); size_t count_arg = k.add_arg<const uint_>("count"); @@ -249,8 +250,11 @@ inline size_t bitonic_block_sort(KeyIterator keys_first, k.var<key_type>("my_key") << ";\n"; if(sort_by_key) { - k << values_first[k.var<const uint_>("gid")] << " = " << - values_first[k.var<const uint_>("offset + my_index")] << ";\n"; + k << + k.decl<value_type>("my_value") << " = " << + values_first[k.var<const uint_>("offset + my_index")] << ";\n" << + "barrier(CLK_GLOBAL_MEM_FENCE);\n" << + values_first[k.var<const uint_>("gid")] << " = my_value;\n"; } k << // end if @@ -418,7 +422,7 @@ inline void merge_blocks_on_gpu(KeyIterator keys_first, ");\n" << "left_idx = equal ? mid_idx + 1 : left_idx + 1;\n" << "right_idx = equal ? right_idx : mid_idx;\n" << - "upper_key = equal ? upper_key : " << + "upper_key = " << keys_first[k.var<const uint_>("left_idx")] << ";\n" << "}\n" << "}\n" << diff --git a/boost/compute/algorithm/detail/radix_sort.hpp b/boost/compute/algorithm/detail/radix_sort.hpp index 8e6d5f9c0a..53b1205c70 100644 --- a/boost/compute/algorithm/detail/radix_sort.hpp +++ b/boost/compute/algorithm/detail/radix_sort.hpp @@ -17,6 +17,9 @@ #include <boost/type_traits/is_signed.hpp> #include <boost/type_traits/is_floating_point.hpp> +#include <boost/mpl/and.hpp> +#include <boost/mpl/not.hpp> + #include <boost/compute/kernel.hpp> #include <boost/compute/program.hpp> #include <boost/compute/command_queue.hpp> @@ -305,9 +308,12 @@ inline void radix_sort_impl(const buffer_iterator<T> first, options << " -DASC"; } + // get type definition if it is a custom struct + std::string custom_type_def = boost::compute::type_definition<T2>() + "\n"; + // load radix sort program program radix_sort_program = cache->get_or_build( - cache_key, options.str(), radix_sort_source, context + cache_key, options.str(), custom_type_def + radix_sort_source, context ); kernel count_kernel(radix_sort_program, "count"); diff --git a/boost/compute/algorithm/detail/serial_reduce.hpp b/boost/compute/algorithm/detail/serial_reduce.hpp index 53aaf140fe..8b121274b9 100644 --- a/boost/compute/algorithm/detail/serial_reduce.hpp +++ b/boost/compute/algorithm/detail/serial_reduce.hpp @@ -20,6 +20,7 @@ namespace boost { namespace compute { namespace detail { +// Space complexity: O(1) template<class InputIterator, class OutputIterator, class BinaryFunction> inline void serial_reduce(InputIterator first, InputIterator last, diff --git a/boost/compute/algorithm/detail/serial_reduce_by_key.hpp b/boost/compute/algorithm/detail/serial_reduce_by_key.hpp index f9bda8e476..6fb04baa6d 100644 --- a/boost/compute/algorithm/detail/serial_reduce_by_key.hpp +++ b/boost/compute/algorithm/detail/serial_reduce_by_key.hpp @@ -55,11 +55,9 @@ inline size_t serial_reduce_by_key(InputKeyIterator keys_first, size_t result_size_arg = k.add_arg<uint_ *>(memory_object::global_memory, "result_size"); - convert<result_type> to_result_type; - k << k.decl<result_type>("result") << - " = " << to_result_type(values_first[0]) << ";\n" << + " = " << values_first[0] << ";\n" << k.decl<key_type>("previous_key") << " = " << keys_first[0] << ";\n" << k.decl<result_type>("value") << ";\n" << k.decl<key_type>("key") << ";\n" << @@ -70,7 +68,7 @@ inline size_t serial_reduce_by_key(InputKeyIterator keys_first, values_result[0] << " = result;\n" << "for(ulong i = 1; i < count; i++) {\n" << - " value = " << to_result_type(values_first[k.var<uint_>("i")]) << ";\n" << + " value = " << values_first[k.var<uint_>("i")] << ";\n" << " key = " << keys_first[k.var<uint_>("i")] << ";\n" << " if (" << predicate(k.var<key_type>("previous_key"), k.var<key_type>("key")) << ") {\n" << diff --git a/boost/compute/algorithm/equal.hpp b/boost/compute/algorithm/equal.hpp index 35d0c5f0ea..c3c8053b71 100644 --- a/boost/compute/algorithm/equal.hpp +++ b/boost/compute/algorithm/equal.hpp @@ -20,6 +20,8 @@ namespace compute { /// Returns \c true if the range [\p first1, \p last1) and the range /// beginning at \p first2 are equal. +/// +/// Space complexity: \Omega(1) template<class InputIterator1, class InputIterator2> inline bool equal(InputIterator1 first1, InputIterator1 last1, diff --git a/boost/compute/algorithm/equal_range.hpp b/boost/compute/algorithm/equal_range.hpp index fd82177324..d7008e3cf4 100644 --- a/boost/compute/algorithm/equal_range.hpp +++ b/boost/compute/algorithm/equal_range.hpp @@ -23,6 +23,8 @@ namespace compute { /// Returns a pair of iterators containing the range of values equal /// to \p value in the sorted range [\p first, \p last). +/// +/// Space complexity: \Omega(1) template<class InputIterator, class T> inline std::pair<InputIterator, InputIterator> equal_range(InputIterator first, diff --git a/boost/compute/algorithm/exclusive_scan.hpp b/boost/compute/algorithm/exclusive_scan.hpp index 205d3de658..806a172cf4 100644 --- a/boost/compute/algorithm/exclusive_scan.hpp +++ b/boost/compute/algorithm/exclusive_scan.hpp @@ -44,6 +44,10 @@ namespace compute { /// /// \snippet test/test_scan.cpp exclusive_scan_int_multiplies /// +/// Space complexity on GPUs: \Omega(n)<br> +/// Space complexity on GPUs when \p first == \p result: \Omega(2n)<br> +/// Space complexity on CPUs: \Omega(1) +/// /// \see inclusive_scan() template<class InputIterator, class OutputIterator, class T, class BinaryOperator> inline OutputIterator diff --git a/boost/compute/algorithm/fill.hpp b/boost/compute/algorithm/fill.hpp index c711f46b94..646d8acda4 100644 --- a/boost/compute/algorithm/fill.hpp +++ b/boost/compute/algorithm/fill.hpp @@ -64,7 +64,7 @@ inline future<void> fill_async_with_copy(BufferIterator first, ); } -#if defined(CL_VERSION_1_2) +#if defined(BOOST_COMPUTE_CL_VERSION_1_2) // meta-function returing true if Iterator points to a range of values // that can be filled using clEnqueueFillBuffer(). to meet this criteria @@ -172,7 +172,7 @@ dispatch_fill_async(BufferIterator first, return future<void>(event_); } -#ifdef CL_VERSION_2_0 +#ifdef BOOST_COMPUTE_CL_VERSION_2_0 // specializations for svm_ptr<T> template<class T> inline void dispatch_fill(svm_ptr<T> first, @@ -205,7 +205,7 @@ inline future<void> dispatch_fill_async(svm_ptr<T> first, return future<void>(event_); } -#endif // CL_VERSION_2_0 +#endif // BOOST_COMPUTE_CL_VERSION_2_0 // default implementations template<class BufferIterator, class T> @@ -251,7 +251,7 @@ inline future<void> dispatch_fill_async(BufferIterator first, { return fill_async_with_copy(first, count, value, queue); } -#endif // !defined(CL_VERSION_1_2) +#endif // !defined(BOOST_COMPUTE_CL_VERSION_1_2) } // end detail namespace @@ -271,6 +271,8 @@ inline future<void> dispatch_fill_async(BufferIterator first, /// boost::compute::fill(vec.begin(), vec.end(), 7, queue); /// \endcode /// +/// Space complexity: \Omega(1) +/// /// \see boost::compute::fill_n() template<class BufferIterator, class T> inline void fill(BufferIterator first, diff --git a/boost/compute/algorithm/fill_n.hpp b/boost/compute/algorithm/fill_n.hpp index 18a8f706a5..6be2d280a6 100644 --- a/boost/compute/algorithm/fill_n.hpp +++ b/boost/compute/algorithm/fill_n.hpp @@ -20,6 +20,8 @@ namespace compute { /// Fills the range [\p first, \p first + count) with \p value. /// +/// Space complexity: \Omega(1) +/// /// \see fill() template<class BufferIterator, class Size, class T> inline void fill_n(BufferIterator first, diff --git a/boost/compute/algorithm/find.hpp b/boost/compute/algorithm/find.hpp index ef3ebf0c47..a6225b8c99 100644 --- a/boost/compute/algorithm/find.hpp +++ b/boost/compute/algorithm/find.hpp @@ -22,6 +22,8 @@ namespace compute { /// Returns an iterator pointing to the first element in the range /// [\p first, \p last) that equals \p value. +/// +/// Space complexity: \Omega(1) template<class InputIterator, class T> inline InputIterator find(InputIterator first, InputIterator last, diff --git a/boost/compute/algorithm/find_end.hpp b/boost/compute/algorithm/find_end.hpp index 265a1da542..a0a1b2e8c9 100644 --- a/boost/compute/algorithm/find_end.hpp +++ b/boost/compute/algorithm/find_end.hpp @@ -26,8 +26,8 @@ namespace detail { /// /// \brief Helper function for find_end /// -/// Basically a copy of find_if which returns last occurence -/// instead of first occurence +/// Basically a copy of find_if which returns last occurrence +/// instead of first occurrence /// template<class InputIterator, class UnaryPredicate> inline InputIterator find_end_helper(InputIterator first, @@ -90,6 +90,8 @@ inline InputIterator find_end_helper(InputIterator first, /// \param p_last Iterator pointing to end of pattern /// \param queue Queue on which to execute /// +/// Space complexity: \Omega(n) +/// template<class TextIterator, class PatternIterator> inline TextIterator find_end(TextIterator t_first, TextIterator t_last, diff --git a/boost/compute/algorithm/find_if.hpp b/boost/compute/algorithm/find_if.hpp index db99cc0396..074b47e280 100644 --- a/boost/compute/algorithm/find_if.hpp +++ b/boost/compute/algorithm/find_if.hpp @@ -20,6 +20,8 @@ namespace compute { /// Returns an iterator pointing to the first element in the range /// [\p first, \p last) for which \p predicate returns \c true. +/// +/// Space complexity: \Omega(1) template<class InputIterator, class UnaryPredicate> inline InputIterator find_if(InputIterator first, InputIterator last, diff --git a/boost/compute/algorithm/find_if_not.hpp b/boost/compute/algorithm/find_if_not.hpp index 61de050d31..a008a99469 100644 --- a/boost/compute/algorithm/find_if_not.hpp +++ b/boost/compute/algorithm/find_if_not.hpp @@ -22,6 +22,8 @@ namespace compute { /// Returns an iterator pointing to the first element in the range /// [\p first, \p last) for which \p predicate returns \c false. /// +/// Space complexity: \Omega(1) +/// /// \see find_if() template<class InputIterator, class UnaryPredicate> inline InputIterator find_if_not(InputIterator first, diff --git a/boost/compute/algorithm/for_each.hpp b/boost/compute/algorithm/for_each.hpp index 3ed399e6e9..7afba2b5f5 100644 --- a/boost/compute/algorithm/for_each.hpp +++ b/boost/compute/algorithm/for_each.hpp @@ -45,6 +45,8 @@ struct for_each_kernel : public meta_kernel /// Calls \p function on each element in the range [\p first, \p last). /// +/// Space complexity: \Omega(1) +/// /// \see transform() template<class InputIterator, class UnaryFunction> inline UnaryFunction for_each(InputIterator first, diff --git a/boost/compute/algorithm/for_each_n.hpp b/boost/compute/algorithm/for_each_n.hpp index d0be784bf7..77932ab209 100644 --- a/boost/compute/algorithm/for_each_n.hpp +++ b/boost/compute/algorithm/for_each_n.hpp @@ -19,6 +19,8 @@ namespace compute { /// Calls \p function on each element in the range [\p first, \p first /// \c + \p count). /// +/// Space complexity: \Omega(1) +/// /// \see for_each() template<class InputIterator, class Size, class UnaryFunction> inline UnaryFunction for_each_n(InputIterator first, diff --git a/boost/compute/algorithm/gather.hpp b/boost/compute/algorithm/gather.hpp index 24c5c727ae..62442587f7 100644 --- a/boost/compute/algorithm/gather.hpp +++ b/boost/compute/algorithm/gather.hpp @@ -62,6 +62,8 @@ private: /// to the range beginning at \p result using the input values from the range /// beginning at \p input. /// +/// Space complexity: \Omega(1) +/// /// \see scatter() template<class InputIterator, class MapIterator, class OutputIterator> inline void gather(MapIterator first, diff --git a/boost/compute/algorithm/generate.hpp b/boost/compute/algorithm/generate.hpp index c70a542683..9ac76a3dca 100644 --- a/boost/compute/algorithm/generate.hpp +++ b/boost/compute/algorithm/generate.hpp @@ -22,6 +22,8 @@ namespace compute { /// Stores the result of \p generator for each element in the range /// [\p first, \p last). +/// +/// Space complexity: \Omega(1) template<class OutputIterator, class Generator> inline void generate(OutputIterator first, OutputIterator last, diff --git a/boost/compute/algorithm/generate_n.hpp b/boost/compute/algorithm/generate_n.hpp index 6d8e607b64..066a831ddf 100644 --- a/boost/compute/algorithm/generate_n.hpp +++ b/boost/compute/algorithm/generate_n.hpp @@ -20,6 +20,8 @@ namespace compute { /// Stores the result of \p generator for each element in the range /// [\p first, \p first + \p count). +/// +/// Space complexity: \Omega(1) template<class OutputIterator, class Size, class Generator> inline void generate_n(OutputIterator first, Size count, diff --git a/boost/compute/algorithm/includes.hpp b/boost/compute/algorithm/includes.hpp index c4e7c793e7..cfef9540a7 100644 --- a/boost/compute/algorithm/includes.hpp +++ b/boost/compute/algorithm/includes.hpp @@ -110,6 +110,7 @@ private: /// \param last2 Iterator pointing to end of second set /// \param queue Queue on which to execute /// +/// Space complexity: \Omega(distance(\p first1, \p last1) + distance(\p first2, \p last2)) template<class InputIterator1, class InputIterator2> inline bool includes(InputIterator1 first1, InputIterator1 last1, diff --git a/boost/compute/algorithm/inclusive_scan.hpp b/boost/compute/algorithm/inclusive_scan.hpp index 9f98beaf7c..84f1b8cbf7 100644 --- a/boost/compute/algorithm/inclusive_scan.hpp +++ b/boost/compute/algorithm/inclusive_scan.hpp @@ -42,6 +42,10 @@ namespace compute { /// /// \snippet test/test_scan.cpp inclusive_scan_int_multiplies /// +/// Space complexity on GPUs: \Omega(n)<br> +/// Space complexity on GPUs when \p first == \p result: \Omega(2n)<br> +/// Space complexity on CPUs: \Omega(1) +/// /// \see exclusive_scan() template<class InputIterator, class OutputIterator, class BinaryOperator> inline OutputIterator diff --git a/boost/compute/algorithm/inner_product.hpp b/boost/compute/algorithm/inner_product.hpp index 614611f91e..0aeaf9110e 100644 --- a/boost/compute/algorithm/inner_product.hpp +++ b/boost/compute/algorithm/inner_product.hpp @@ -26,6 +26,9 @@ namespace compute { /// Returns the inner product of the elements in the range /// [\p first1, \p last1) with the elements in the range beginning /// at \p first2. +/// +/// Space complexity: \Omega(1)<br> +/// Space complexity when binary operator is recognized as associative: \Omega(n) template<class InputIterator1, class InputIterator2, class T> inline T inner_product(InputIterator1 first1, InputIterator1 last1, diff --git a/boost/compute/algorithm/inplace_merge.hpp b/boost/compute/algorithm/inplace_merge.hpp index 3080950df5..91f5be5335 100644 --- a/boost/compute/algorithm/inplace_merge.hpp +++ b/boost/compute/algorithm/inplace_merge.hpp @@ -23,6 +23,8 @@ namespace compute { /// Merges the sorted values in the range [\p first, \p middle) with /// the sorted values in the range [\p middle, \p last) in-place. +/// +/// Space complexity: \Omega(n) template<class Iterator> inline void inplace_merge(Iterator first, Iterator middle, diff --git a/boost/compute/algorithm/iota.hpp b/boost/compute/algorithm/iota.hpp index 084c3d8d97..4cd7aa9c7b 100644 --- a/boost/compute/algorithm/iota.hpp +++ b/boost/compute/algorithm/iota.hpp @@ -26,6 +26,8 @@ namespace compute { /// \snippet test/test_iota.cpp iota /// /// Will fill \c vec with the values (\c 0, \c 1, \c 2, \c ...). +/// +/// Space complexity: \Omega(1) template<class BufferIterator, class T> inline void iota(BufferIterator first, BufferIterator last, diff --git a/boost/compute/algorithm/is_partitioned.hpp b/boost/compute/algorithm/is_partitioned.hpp index 3916825057..6ad24f240f 100644 --- a/boost/compute/algorithm/is_partitioned.hpp +++ b/boost/compute/algorithm/is_partitioned.hpp @@ -21,6 +21,8 @@ namespace compute { /// Returns \c true if the values in the range [\p first, \p last) /// are partitioned according to \p predicate. +/// +/// Space complexity: \Omega(1) template<class InputIterator, class UnaryPredicate> inline bool is_partitioned(InputIterator first, InputIterator last, diff --git a/boost/compute/algorithm/is_permutation.hpp b/boost/compute/algorithm/is_permutation.hpp index 1e502efb37..88b89b7973 100644 --- a/boost/compute/algorithm/is_permutation.hpp +++ b/boost/compute/algorithm/is_permutation.hpp @@ -36,6 +36,7 @@ namespace compute { /// \param last2 Iterator pointing to end of second range /// \param queue Queue on which to execute /// +/// Space complexity: \Omega(distance(\p first1, \p last1) + distance(\p first2, \p last2)) template<class InputIterator1, class InputIterator2> inline bool is_permutation(InputIterator1 first1, InputIterator1 last1, diff --git a/boost/compute/algorithm/is_sorted.hpp b/boost/compute/algorithm/is_sorted.hpp index a605159ac3..7441620978 100644 --- a/boost/compute/algorithm/is_sorted.hpp +++ b/boost/compute/algorithm/is_sorted.hpp @@ -30,6 +30,8 @@ namespace compute { /// /// \return \c true if the range [\p first, \p last) is sorted /// +/// Space complexity: \Omega(1) +/// /// \see sort() template<class InputIterator, class Compare> inline bool is_sorted(InputIterator first, diff --git a/boost/compute/algorithm/lexicographical_compare.hpp b/boost/compute/algorithm/lexicographical_compare.hpp index c4f7120807..952e678a68 100644 --- a/boost/compute/algorithm/lexicographical_compare.hpp +++ b/boost/compute/algorithm/lexicographical_compare.hpp @@ -42,10 +42,10 @@ const char lexicographical_compare_source[] = template<class InputIterator1, class InputIterator2> inline bool dispatch_lexicographical_compare(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - command_queue &queue) + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + command_queue &queue) { const boost::compute::context &context = queue.get_context(); @@ -103,6 +103,9 @@ inline bool dispatch_lexicographical_compare(InputIterator1 first1, /// Checks if the first range [first1, last1) is lexicographically /// less than the second range [first2, last2). +/// +/// Space complexity: +/// \Omega(max(distance(\p first1, \p last1), distance(\p first2, \p last2))) template<class InputIterator1, class InputIterator2> inline bool lexicographical_compare(InputIterator1 first1, InputIterator1 last1, diff --git a/boost/compute/algorithm/lower_bound.hpp b/boost/compute/algorithm/lower_bound.hpp index b2011c66ef..f78bbd6364 100644 --- a/boost/compute/algorithm/lower_bound.hpp +++ b/boost/compute/algorithm/lower_bound.hpp @@ -22,6 +22,8 @@ namespace compute { /// Returns an iterator pointing to the first element in the sorted /// range [\p first, \p last) that is not less than \p value. /// +/// Space complexity: \Omega(1) +/// /// \see upper_bound() template<class InputIterator, class T> inline InputIterator diff --git a/boost/compute/algorithm/max_element.hpp b/boost/compute/algorithm/max_element.hpp index 55f2f7ffbf..f9df37420c 100644 --- a/boost/compute/algorithm/max_element.hpp +++ b/boost/compute/algorithm/max_element.hpp @@ -43,6 +43,9 @@ namespace compute { /// boost::compute::max_element(data.begin(), data.end(), compare_first, queue); /// \endcode /// +/// Space complexity on CPUs: \Omega(1)<br> +/// Space complexity on GPUs: \Omega(N) +/// /// \see min_element() template<class InputIterator, class Compare> inline InputIterator diff --git a/boost/compute/algorithm/merge.hpp b/boost/compute/algorithm/merge.hpp index 875a283044..ff3e6e879a 100644 --- a/boost/compute/algorithm/merge.hpp +++ b/boost/compute/algorithm/merge.hpp @@ -37,6 +37,8 @@ namespace compute { /// /// \return \c OutputIterator to the end of the result range /// +/// Space complexity: \Omega(distance(\p first1, \p last1) + distance(\p first2, \p last2)) +/// /// \see inplace_merge() template<class InputIterator1, class InputIterator2, diff --git a/boost/compute/algorithm/min_element.hpp b/boost/compute/algorithm/min_element.hpp index 62744efb98..b52e2670cb 100644 --- a/boost/compute/algorithm/min_element.hpp +++ b/boost/compute/algorithm/min_element.hpp @@ -43,6 +43,9 @@ namespace compute { /// boost::compute::min_element(data.begin(), data.end(), compare_first, queue); /// \endcode /// +/// Space complexity on CPUs: \Omega(1)<br> +/// Space complexity on GPUs: \Omega(N) +/// /// \see max_element() template<class InputIterator, class Compare> inline InputIterator diff --git a/boost/compute/algorithm/minmax_element.hpp b/boost/compute/algorithm/minmax_element.hpp index 3f44c09eaf..4b2aae6dee 100644 --- a/boost/compute/algorithm/minmax_element.hpp +++ b/boost/compute/algorithm/minmax_element.hpp @@ -31,6 +31,9 @@ namespace compute { /// argument is less than (i.e. is ordered before) the second. /// \param queue command queue to perform the operation /// +/// Space complexity on CPUs: \Omega(1)<br> +/// Space complexity on GPUs: \Omega(N) +/// /// \see max_element(), min_element() template<class InputIterator, class Compare> inline std::pair<InputIterator, InputIterator> diff --git a/boost/compute/algorithm/mismatch.hpp b/boost/compute/algorithm/mismatch.hpp index e7db883004..ff31f49f97 100644 --- a/boost/compute/algorithm/mismatch.hpp +++ b/boost/compute/algorithm/mismatch.hpp @@ -28,6 +28,8 @@ namespace compute { /// Returns a pair of iterators pointing to the first position where the /// range [\p first1, \p last1) and the range starting at \p first2 /// differ. +/// +/// Space complexity: \Omega(1) template<class InputIterator1, class InputIterator2> inline std::pair<InputIterator1, InputIterator2> mismatch(InputIterator1 first1, diff --git a/boost/compute/algorithm/next_permutation.hpp b/boost/compute/algorithm/next_permutation.hpp index e81fbd2ee8..061ea1efe9 100644 --- a/boost/compute/algorithm/next_permutation.hpp +++ b/boost/compute/algorithm/next_permutation.hpp @@ -131,6 +131,7 @@ inline InputIterator np_ceiling(InputIterator first, /// \param last Iterator pointing to end of range /// \param queue Queue on which to execute /// +/// Space complexity: \Omega(1) template<class InputIterator> inline bool next_permutation(InputIterator first, InputIterator last, diff --git a/boost/compute/algorithm/none_of.hpp b/boost/compute/algorithm/none_of.hpp index c25dd12a87..fc3ed94bc1 100644 --- a/boost/compute/algorithm/none_of.hpp +++ b/boost/compute/algorithm/none_of.hpp @@ -20,6 +20,8 @@ namespace compute { /// Returns \c true if \p predicate returns \c true for none of the elements in /// the range [\p first, \p last). /// +/// Space complexity: \Omega(1) +/// /// \see all_of(), any_of() template<class InputIterator, class UnaryPredicate> inline bool none_of(InputIterator first, diff --git a/boost/compute/algorithm/nth_element.hpp b/boost/compute/algorithm/nth_element.hpp index 68f7a3dbc0..93344271dd 100644 --- a/boost/compute/algorithm/nth_element.hpp +++ b/boost/compute/algorithm/nth_element.hpp @@ -23,6 +23,8 @@ namespace compute { /// Rearranges the elements in the range [\p first, \p last) such that /// the \p nth element would be in that position in a sorted sequence. +/// +/// Space complexity: \Omega(3n) template<class Iterator, class Compare> inline void nth_element(Iterator first, Iterator nth, diff --git a/boost/compute/algorithm/partial_sum.hpp b/boost/compute/algorithm/partial_sum.hpp index d440369a5a..53d36a9db0 100644 --- a/boost/compute/algorithm/partial_sum.hpp +++ b/boost/compute/algorithm/partial_sum.hpp @@ -21,6 +21,10 @@ namespace compute { /// Calculates the cumulative sum of the elements in the range [\p first, /// \p last) and writes the resulting values to the range beginning at /// \p result. +/// +/// Space complexity on GPUs: \Omega(n)<br> +/// Space complexity on GPUs when \p first == \p result: \Omega(2n)<br> +/// Space complexity on CPUs: \Omega(1) template<class InputIterator, class OutputIterator> inline OutputIterator partial_sum(InputIterator first, diff --git a/boost/compute/algorithm/partition.hpp b/boost/compute/algorithm/partition.hpp index 7860350e0d..59d0c78f7e 100644 --- a/boost/compute/algorithm/partition.hpp +++ b/boost/compute/algorithm/partition.hpp @@ -22,6 +22,8 @@ namespace compute { /// Partitions the elements in the range [\p first, \p last) according to /// \p predicate. Order of the elements need not be preserved. /// +/// Space complexity: \Omega(3n) +/// /// \see is_partitioned() and stable_partition() /// template<class Iterator, class UnaryPredicate> diff --git a/boost/compute/algorithm/partition_copy.hpp b/boost/compute/algorithm/partition_copy.hpp index 80a2c6475f..3215ec0736 100644 --- a/boost/compute/algorithm/partition_copy.hpp +++ b/boost/compute/algorithm/partition_copy.hpp @@ -24,6 +24,8 @@ namespace compute { /// and all of the elements for which \p predicate returns \c false to /// the range beginning at \p first_false. /// +/// Space complexity: \Omega(2n) +/// /// \see partition() template<class InputIterator, class OutputIterator1, diff --git a/boost/compute/algorithm/partition_point.hpp b/boost/compute/algorithm/partition_point.hpp index 3cc2bc0ca6..748824512d 100644 --- a/boost/compute/algorithm/partition_point.hpp +++ b/boost/compute/algorithm/partition_point.hpp @@ -29,6 +29,8 @@ namespace compute { /// \param predicate Unary predicate to be applied on each element /// \param queue Queue on which to execute /// +/// Space complexity: \Omega(1) +/// /// \see partition() and stable_partition() /// template<class InputIterator, class UnaryPredicate> diff --git a/boost/compute/algorithm/prev_permutation.hpp b/boost/compute/algorithm/prev_permutation.hpp index 03c01bf8f4..ea20835caa 100644 --- a/boost/compute/algorithm/prev_permutation.hpp +++ b/boost/compute/algorithm/prev_permutation.hpp @@ -131,6 +131,7 @@ inline InputIterator pp_floor(InputIterator first, /// \param last Iterator pointing to end of range /// \param queue Queue on which to execute /// +/// Space complexity: \Omega(1) template<class InputIterator> inline bool prev_permutation(InputIterator first, InputIterator last, diff --git a/boost/compute/algorithm/random_shuffle.hpp b/boost/compute/algorithm/random_shuffle.hpp index 7d2d46a133..8e020830a5 100644 --- a/boost/compute/algorithm/random_shuffle.hpp +++ b/boost/compute/algorithm/random_shuffle.hpp @@ -28,6 +28,8 @@ namespace compute { /// Randomly shuffles the elements in the range [\p first, \p last). /// +/// Space complexity: \Omega(2n) +/// /// \see scatter() template<class Iterator> inline void random_shuffle(Iterator first, diff --git a/boost/compute/algorithm/reduce.hpp b/boost/compute/algorithm/reduce.hpp index 19d070019f..e71d90fe24 100644 --- a/boost/compute/algorithm/reduce.hpp +++ b/boost/compute/algorithm/reduce.hpp @@ -153,6 +153,7 @@ block_reduce(InputIterator first, return result_vector; } +// Space complexity: O( ceil(n / 2 / 256) ) template<class InputIterator, class OutputIterator, class BinaryFunction> inline void generic_reduce(InputIterator first, InputIterator last, @@ -264,6 +265,9 @@ inline void dispatch_reduce(InputIterator first, /// efficient on parallel hardware. For more information, see the documentation /// on the \c accumulate() algorithm. /// +/// Space complexity on GPUs: \Omega(n)<br> +/// Space complexity on CPUs: \Omega(1) +/// /// \see accumulate() template<class InputIterator, class OutputIterator, class BinaryFunction> inline void reduce(InputIterator first, diff --git a/boost/compute/algorithm/reduce_by_key.hpp b/boost/compute/algorithm/reduce_by_key.hpp index 87c73e887f..1a233c7dd4 100644 --- a/boost/compute/algorithm/reduce_by_key.hpp +++ b/boost/compute/algorithm/reduce_by_key.hpp @@ -51,6 +51,9 @@ namespace compute { /// /// \snippet test/test_reduce_by_key.cpp reduce_by_key_int /// +/// Space complexity on GPUs: \Omega(2n)<br> +/// Space complexity on CPUs: \Omega(1) +/// /// \see reduce() template<class InputKeyIterator, class InputValueIterator, class OutputKeyIterator, class OutputValueIterator, diff --git a/boost/compute/algorithm/remove.hpp b/boost/compute/algorithm/remove.hpp index 98feb1f9d8..086ba8cc7f 100644 --- a/boost/compute/algorithm/remove.hpp +++ b/boost/compute/algorithm/remove.hpp @@ -22,6 +22,8 @@ namespace compute { /// Removes each element equal to \p value in the range [\p first, /// \p last). /// +/// Space complexity: \Omega(3n) +/// /// \see remove_if() template<class Iterator, class T> inline Iterator remove(Iterator first, diff --git a/boost/compute/algorithm/remove_if.hpp b/boost/compute/algorithm/remove_if.hpp index 5e416bef88..9aece18bbd 100644 --- a/boost/compute/algorithm/remove_if.hpp +++ b/boost/compute/algorithm/remove_if.hpp @@ -22,6 +22,8 @@ namespace compute { /// Removes each element for which \p predicate returns \c true in the /// range [\p first, \p last). /// +/// Space complexity: \Omega(3n) +/// /// \see remove() template<class Iterator, class Predicate> inline Iterator remove_if(Iterator first, diff --git a/boost/compute/algorithm/replace.hpp b/boost/compute/algorithm/replace.hpp index fd649a2fad..336c1d3e0f 100644 --- a/boost/compute/algorithm/replace.hpp +++ b/boost/compute/algorithm/replace.hpp @@ -68,6 +68,8 @@ private: /// Replaces each instance of \p old_value in the range [\p first, /// \p last) with \p new_value. +/// +/// Space complexity: \Omega(1) template<class Iterator, class T> inline void replace(Iterator first, Iterator last, diff --git a/boost/compute/algorithm/replace_copy.hpp b/boost/compute/algorithm/replace_copy.hpp index 7224bd3ae6..34f61b514f 100644 --- a/boost/compute/algorithm/replace_copy.hpp +++ b/boost/compute/algorithm/replace_copy.hpp @@ -25,6 +25,8 @@ namespace compute { /// beginning at \p result while replacing each instance of \p old_value /// with \p new_value. /// +/// Space complexity: \Omega(1) +/// /// \see replace() template<class InputIterator, class OutputIterator, class T> inline OutputIterator diff --git a/boost/compute/algorithm/reverse.hpp b/boost/compute/algorithm/reverse.hpp index b6a9e8098c..15fe5533ac 100644 --- a/boost/compute/algorithm/reverse.hpp +++ b/boost/compute/algorithm/reverse.hpp @@ -52,6 +52,8 @@ struct reverse_kernel : public meta_kernel /// Reverses the elements in the range [\p first, \p last). /// +/// Space complexity: \Omega(1) +/// /// \see reverse_copy() template<class Iterator> inline void reverse(Iterator first, diff --git a/boost/compute/algorithm/reverse_copy.hpp b/boost/compute/algorithm/reverse_copy.hpp index c839f44651..9fda9d4e27 100644 --- a/boost/compute/algorithm/reverse_copy.hpp +++ b/boost/compute/algorithm/reverse_copy.hpp @@ -51,6 +51,8 @@ struct reverse_copy_kernel : public meta_kernel /// Copies the elements in the range [\p first, \p last) in reversed /// order to the range beginning at \p result. /// +/// Space complexity: \Omega(1) +/// /// \see reverse() template<class InputIterator, class OutputIterator> inline OutputIterator diff --git a/boost/compute/algorithm/rotate.hpp b/boost/compute/algorithm/rotate.hpp index 54cb073cc2..715699340d 100644 --- a/boost/compute/algorithm/rotate.hpp +++ b/boost/compute/algorithm/rotate.hpp @@ -21,6 +21,8 @@ namespace compute { /// Performs left rotation such that element at \p n_first comes to the /// beginning. /// +/// Space complexity: \Omega(distance(\p first, \p last)) +/// /// \see rotate_copy() template<class InputIterator> inline void rotate(InputIterator first, diff --git a/boost/compute/algorithm/rotate_copy.hpp b/boost/compute/algorithm/rotate_copy.hpp index fa1b44c5e5..679b3c998b 100644 --- a/boost/compute/algorithm/rotate_copy.hpp +++ b/boost/compute/algorithm/rotate_copy.hpp @@ -20,6 +20,8 @@ namespace compute { /// Performs left rotation such that element at n_first comes to the /// beginning and the output is stored in range starting at result. /// +/// Space complexity: \Omega(1) +/// /// \see rotate() template<class InputIterator, class OutputIterator> inline void rotate_copy(InputIterator first, diff --git a/boost/compute/algorithm/scatter.hpp b/boost/compute/algorithm/scatter.hpp index bea4201628..8ae5a99443 100644 --- a/boost/compute/algorithm/scatter.hpp +++ b/boost/compute/algorithm/scatter.hpp @@ -79,6 +79,8 @@ private: /// beginning at \p result using the output indices from the range beginning /// at \p map. /// +/// Space complexity: \Omega(1) +/// /// \see gather() template<class InputIterator, class MapIterator, class OutputIterator> inline void scatter(InputIterator first, diff --git a/boost/compute/algorithm/scatter_if.hpp b/boost/compute/algorithm/scatter_if.hpp index 159edd8c86..c7db51d3be 100644 --- a/boost/compute/algorithm/scatter_if.hpp +++ b/boost/compute/algorithm/scatter_if.hpp @@ -83,7 +83,7 @@ private: /// at \p map if stencil is resolved to true. By default the predicate is /// an identity /// -/// +/// Space complexity: \Omega(1) template<class InputIterator, class MapIterator, class StencilIterator, class OutputIterator, class Predicate> inline void scatter_if(InputIterator first, diff --git a/boost/compute/algorithm/search.hpp b/boost/compute/algorithm/search.hpp index 3d3d035b3c..a1f3dece62 100644 --- a/boost/compute/algorithm/search.hpp +++ b/boost/compute/algorithm/search.hpp @@ -34,6 +34,7 @@ namespace compute { /// \param p_last Iterator pointing to end of pattern /// \param queue Queue on which to execute /// +/// Space complexity: \Omega(distance(\p t_first, \p t_last)) template<class TextIterator, class PatternIterator> inline TextIterator search(TextIterator t_first, TextIterator t_last, diff --git a/boost/compute/algorithm/search_n.hpp b/boost/compute/algorithm/search_n.hpp index 9e03111bb0..86ff64dfd9 100644 --- a/boost/compute/algorithm/search_n.hpp +++ b/boost/compute/algorithm/search_n.hpp @@ -102,6 +102,7 @@ private: /// \param value Value which repeats /// \param queue Queue on which to execute /// +/// Space complexity: \Omega(distance(\p t_first, \p t_last)) template<class TextIterator, class ValueType> inline TextIterator search_n(TextIterator t_first, TextIterator t_last, diff --git a/boost/compute/algorithm/set_difference.hpp b/boost/compute/algorithm/set_difference.hpp index 17ce7bd3f6..85a846ba13 100644 --- a/boost/compute/algorithm/set_difference.hpp +++ b/boost/compute/algorithm/set_difference.hpp @@ -122,6 +122,8 @@ private: /// will be stored /// \param queue Queue on which to execute /// +/// Space complexity: +/// \Omega(2(distance(\p first1, \p last1) + distance(\p first2, \p last2))) template<class InputIterator1, class InputIterator2, class OutputIterator> inline OutputIterator set_difference(InputIterator1 first1, InputIterator1 last1, diff --git a/boost/compute/algorithm/set_intersection.hpp b/boost/compute/algorithm/set_intersection.hpp index 50f291e84a..74d46f57c6 100644 --- a/boost/compute/algorithm/set_intersection.hpp +++ b/boost/compute/algorithm/set_intersection.hpp @@ -110,6 +110,8 @@ private: /// will be stored /// \param queue Queue on which to execute /// +/// Space complexity: +/// \Omega(2(distance(\p first1, \p last1) + distance(\p first2, \p last2))) template<class InputIterator1, class InputIterator2, class OutputIterator> inline OutputIterator set_intersection(InputIterator1 first1, InputIterator1 last1, diff --git a/boost/compute/algorithm/set_symmetric_difference.hpp b/boost/compute/algorithm/set_symmetric_difference.hpp index 6e60b38511..34d280daa3 100644 --- a/boost/compute/algorithm/set_symmetric_difference.hpp +++ b/boost/compute/algorithm/set_symmetric_difference.hpp @@ -133,13 +133,16 @@ private: /// difference will be stored /// \param queue Queue on which to execute /// +/// Space complexity: +/// \Omega(2(distance(\p first1, \p last1) + distance(\p first2, \p last2))) template<class InputIterator1, class InputIterator2, class OutputIterator> -inline OutputIterator set_symmetric_difference(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - command_queue &queue = system::default_queue()) +inline OutputIterator +set_symmetric_difference(InputIterator1 first1, + InputIterator1 last1, + InputIterator2 first2, + InputIterator2 last2, + OutputIterator result, + command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits<InputIterator1>::value_type value_type; diff --git a/boost/compute/algorithm/set_union.hpp b/boost/compute/algorithm/set_union.hpp index c61f7b29b3..6b405a0905 100644 --- a/boost/compute/algorithm/set_union.hpp +++ b/boost/compute/algorithm/set_union.hpp @@ -135,6 +135,8 @@ private: /// will be stored /// \param queue Queue on which to execute /// +/// Space complexity: +/// \Omega(2(distance(\p first1, \p last1) + distance(\p first2, \p last2))) template<class InputIterator1, class InputIterator2, class OutputIterator> inline OutputIterator set_union(InputIterator1 first1, InputIterator1 last1, diff --git a/boost/compute/algorithm/sort.hpp b/boost/compute/algorithm/sort.hpp index 7e0a583e3e..b8fa90f335 100644 --- a/boost/compute/algorithm/sort.hpp +++ b/boost/compute/algorithm/sort.hpp @@ -176,6 +176,8 @@ inline void dispatch_sort(Iterator first, /// boost::compute::sort(data.begin(), data.end(), queue); /// \endcode /// +/// Space complexity: \Omega(n) +/// /// \see is_sorted() template<class Iterator, class Compare> inline void sort(Iterator first, diff --git a/boost/compute/algorithm/sort_by_key.hpp b/boost/compute/algorithm/sort_by_key.hpp index c39bcf9890..fdd2d1c481 100644 --- a/boost/compute/algorithm/sort_by_key.hpp +++ b/boost/compute/algorithm/sort_by_key.hpp @@ -128,6 +128,8 @@ inline void dispatch_sort_by_key(KeyIterator keys_first, /// /// If no compare function is specified, \c less is used. /// +/// Space complexity: \Omega(2n) +/// /// \see sort() template<class KeyIterator, class ValueIterator, class Compare> inline void sort_by_key(KeyIterator keys_first, diff --git a/boost/compute/algorithm/stable_partition.hpp b/boost/compute/algorithm/stable_partition.hpp index 283b068283..2b07f034b9 100644 --- a/boost/compute/algorithm/stable_partition.hpp +++ b/boost/compute/algorithm/stable_partition.hpp @@ -33,6 +33,8 @@ namespace compute { /// \param predicate Unary predicate to be applied on each element /// \param queue Queue on which to execute /// +/// Space complexity: \Omega(3n) +/// /// \see is_partitioned() and partition() /// template<class Iterator, class UnaryPredicate> diff --git a/boost/compute/algorithm/stable_sort.hpp b/boost/compute/algorithm/stable_sort.hpp index 381fc81bc0..0857d75dc9 100644 --- a/boost/compute/algorithm/stable_sort.hpp +++ b/boost/compute/algorithm/stable_sort.hpp @@ -72,6 +72,8 @@ dispatch_gpu_stable_sort(buffer_iterator<T> first, /// Sorts the values in the range [\p first, \p last) according to /// \p compare. The relative order of identical values is preserved. /// +/// Space complexity: \Omega(n) +/// /// \see sort(), is_sorted() template<class Iterator, class Compare> inline void stable_sort(Iterator first, diff --git a/boost/compute/algorithm/stable_sort_by_key.hpp b/boost/compute/algorithm/stable_sort_by_key.hpp index 878f999f44..ce8811ef19 100644 --- a/boost/compute/algorithm/stable_sort_by_key.hpp +++ b/boost/compute/algorithm/stable_sort_by_key.hpp @@ -126,6 +126,8 @@ inline void dispatch_ssort_by_key(KeyIterator keys_first, /// /// If no compare function is specified, \c less is used. /// +/// Space complexity: \Omega(2n) +/// /// \see sort() template<class KeyIterator, class ValueIterator, class Compare> inline void stable_sort_by_key(KeyIterator keys_first, diff --git a/boost/compute/algorithm/swap_ranges.hpp b/boost/compute/algorithm/swap_ranges.hpp index 6ff3e14f6a..a706df7a61 100644 --- a/boost/compute/algorithm/swap_ranges.hpp +++ b/boost/compute/algorithm/swap_ranges.hpp @@ -21,6 +21,8 @@ namespace compute { /// Swaps the elements in the range [\p first1, \p last1) with the /// elements in the range beginning at \p first2. +/// +/// Space complexity: \Omega(distance(\p first1, \p last1)) template<class Iterator1, class Iterator2> inline Iterator2 swap_ranges(Iterator1 first1, Iterator1 last1, diff --git a/boost/compute/algorithm/transform.hpp b/boost/compute/algorithm/transform.hpp index 68750a6523..9137604d55 100644 --- a/boost/compute/algorithm/transform.hpp +++ b/boost/compute/algorithm/transform.hpp @@ -29,6 +29,8 @@ namespace compute { /// /// \snippet test/test_transform.cpp transform_abs /// +/// Space complexity: \Omega(1) +/// /// \see copy() template<class InputIterator, class OutputIterator, class UnaryOperator> inline OutputIterator transform(InputIterator first, diff --git a/boost/compute/algorithm/transform_if.hpp b/boost/compute/algorithm/transform_if.hpp index 0eb0fd434e..9a98102d27 100644 --- a/boost/compute/algorithm/transform_if.hpp +++ b/boost/compute/algorithm/transform_if.hpp @@ -26,6 +26,7 @@ namespace boost { namespace compute { namespace detail { +// Space complexity: O(2n) template<class InputIterator, class OutputIterator, class UnaryFunction, class Predicate> inline OutputIterator transform_if_impl(InputIterator first, InputIterator last, @@ -53,14 +54,12 @@ inline OutputIterator transform_if_impl(InputIterator first, << predicate(first[k1.get_global_id(0)]) << " ? 1 : 0;\n"; k1.exec_1d(queue, 0, count); - // count number of elements to be copied - size_t copied_element_count = - ::boost::compute::count(indices.begin(), indices.end(), 1, queue); - // scan indices + size_t copied_element_count = (indices.cend() - 1).read(queue); ::boost::compute::exclusive_scan( indices.begin(), indices.end(), indices.begin(), queue ); + copied_element_count += (indices.cend() - 1).read(queue); // last scan element plus last mask element // copy values ::boost::compute::detail::meta_kernel k2("transform_if_do_copy"); @@ -98,6 +97,8 @@ inline discard_iterator transform_if_impl(InputIterator first, /// Copies each element in the range [\p first, \p last) for which /// \p predicate returns \c true to the range beginning at \p result. +/// +/// Space complexity: O(2n) template<class InputIterator, class OutputIterator, class UnaryFunction, class Predicate> inline OutputIterator transform_if(InputIterator first, InputIterator last, diff --git a/boost/compute/algorithm/transform_reduce.hpp b/boost/compute/algorithm/transform_reduce.hpp index fbeee5a691..a59a76aefd 100644 --- a/boost/compute/algorithm/transform_reduce.hpp +++ b/boost/compute/algorithm/transform_reduce.hpp @@ -30,6 +30,9 @@ namespace compute { /// /// \snippet test/test_transform_reduce.cpp sum_abs_int /// +/// Space complexity on GPUs: \Omega(n)<br> +/// Space complexity on CPUs: \Omega(1) +/// /// \see reduce(), inner_product() template<class InputIterator, class OutputIterator, diff --git a/boost/compute/algorithm/unique.hpp b/boost/compute/algorithm/unique.hpp index faa36bad9d..8b7e2a0d0d 100644 --- a/boost/compute/algorithm/unique.hpp +++ b/boost/compute/algorithm/unique.hpp @@ -31,6 +31,8 @@ namespace compute { /// /// \return \c InputIterator to the new logical end of the range /// +/// Space complexity: \Omega(4n) +/// /// \see unique_copy() template<class InputIterator, class BinaryPredicate> inline InputIterator unique(InputIterator first, diff --git a/boost/compute/algorithm/unique_copy.hpp b/boost/compute/algorithm/unique_copy.hpp index 2ce60a9359..d5fffd4ff9 100644 --- a/boost/compute/algorithm/unique_copy.hpp +++ b/boost/compute/algorithm/unique_copy.hpp @@ -127,6 +127,8 @@ inline OutputIterator unique_copy(InputIterator first, /// /// \return \c OutputIterator to the end of the result range /// +/// Space complexity: \Omega(4n) +/// /// \see unique() template<class InputIterator, class OutputIterator, class BinaryPredicate> inline OutputIterator unique_copy(InputIterator first, diff --git a/boost/compute/algorithm/upper_bound.hpp b/boost/compute/algorithm/upper_bound.hpp index a5a82d301c..f592c79b9a 100644 --- a/boost/compute/algorithm/upper_bound.hpp +++ b/boost/compute/algorithm/upper_bound.hpp @@ -22,6 +22,8 @@ namespace compute { /// Returns an iterator pointing to the first element in the sorted /// range [\p first, \p last) that is not less than or equal to /// \p value. +/// +/// Space complexity: \Omega(1) template<class InputIterator, class T> inline InputIterator upper_bound(InputIterator first, diff --git a/boost/compute/buffer.hpp b/boost/compute/buffer.hpp index b5a48806d5..128403cd62 100644 --- a/boost/compute/buffer.hpp +++ b/boost/compute/buffer.hpp @@ -154,7 +154,7 @@ public: /// \p queue to perform the copy. buffer clone(command_queue &queue) const; - #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + #if defined(BOOST_COMPUTE_CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Creates a new buffer out of this buffer. /// The new buffer is a sub region of this buffer. /// \p flags The mem_flags which should be used to create the new buffer @@ -187,7 +187,7 @@ public: return buffer(mem, false); } - #endif // CL_VERSION_1_1 + #endif // BOOST_COMPUTE_CL_VERSION_1_1 }; /// \internal_ define get_info() specializations for buffer @@ -201,12 +201,12 @@ BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(buffer, ((cl_context, CL_MEM_CONTEXT)) ) -#ifdef CL_VERSION_1_1 +#ifdef BOOST_COMPUTE_CL_VERSION_1_1 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(buffer, ((cl_mem, CL_MEM_ASSOCIATED_MEMOBJECT)) ((size_t, CL_MEM_OFFSET)) ) -#endif // CL_VERSION_1_1 +#endif // BOOST_COMPUTE_CL_VERSION_1_1 namespace detail { diff --git a/boost/compute/cl.hpp b/boost/compute/cl.hpp index c439d8dfdc..fe25ffde56 100644 --- a/boost/compute/cl.hpp +++ b/boost/compute/cl.hpp @@ -11,10 +11,58 @@ #ifndef BOOST_COMPUTE_CL_HPP #define BOOST_COMPUTE_CL_HPP +#if defined(BOOST_COMPUTE_MAX_CL_VERSION) +# if !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS) && BOOST_COMPUTE_MAX_CL_VERSION < 202 +# define CL_USE_DEPRECATED_OPENCL_2_1_APIS +# endif +# if !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS) && BOOST_COMPUTE_MAX_CL_VERSION < 201 +# define CL_USE_DEPRECATED_OPENCL_2_0_APIS +# endif +# if !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) && BOOST_COMPUTE_MAX_CL_VERSION < 200 +# define CL_USE_DEPRECATED_OPENCL_1_2_APIS +# endif +# if !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) && BOOST_COMPUTE_MAX_CL_VERSION < 102 +# define CL_USE_DEPRECATED_OPENCL_1_1_APIS +# endif +# if !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS) && BOOST_COMPUTE_MAX_CL_VERSION < 101 +# define CL_USE_DEPRECATED_OPENCL_1_0_APIS +# endif +#endif + #if defined(__APPLE__) #include <OpenCL/cl.h> #else #include <CL/cl.h> #endif +// select what OpenCL core API versions to use +#if defined(CL_VERSION_1_0) +# define BOOST_COMPUTE_CL_VERSION_1_0 +#endif +#if defined(CL_VERSION_1_1) +# if !defined(BOOST_COMPUTE_MAX_CL_VERSION) || BOOST_COMPUTE_MAX_CL_VERSION >= 101 +# define BOOST_COMPUTE_CL_VERSION_1_1 +# endif +#endif +#if defined(CL_VERSION_1_2) +# if !defined(BOOST_COMPUTE_MAX_CL_VERSION) || BOOST_COMPUTE_MAX_CL_VERSION >= 102 +# define BOOST_COMPUTE_CL_VERSION_1_2 +# endif +#endif +#if defined(CL_VERSION_2_0) +# if !defined(BOOST_COMPUTE_MAX_CL_VERSION) || BOOST_COMPUTE_MAX_CL_VERSION >= 200 +# define BOOST_COMPUTE_CL_VERSION_2_0 +# endif +#endif +#if defined(CL_VERSION_2_1) +# if !defined(BOOST_COMPUTE_MAX_CL_VERSION) || BOOST_COMPUTE_MAX_CL_VERSION >= 201 +# define BOOST_COMPUTE_CL_VERSION_2_1 +# endif +#endif +#if defined(CL_VERSION_2_2) +# if !defined(BOOST_COMPUTE_MAX_CL_VERSION) || BOOST_COMPUTE_MAX_CL_VERSION >= 202 +# define BOOST_COMPUTE_CL_VERSION_2_2 +# endif +#endif + #endif // BOOST_COMPUTE_CL_HPP diff --git a/boost/compute/command_queue.hpp b/boost/compute/command_queue.hpp index 2a1328a959..d9e81e0bd4 100644 --- a/boost/compute/command_queue.hpp +++ b/boost/compute/command_queue.hpp @@ -81,12 +81,17 @@ public: enum properties { enable_profiling = CL_QUEUE_PROFILING_ENABLE, enable_out_of_order_execution = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE + #ifdef BOOST_COMPUTE_CL_VERSION_2_0 + , + on_device = CL_QUEUE_ON_DEVICE, + on_device_default = CL_QUEUE_ON_DEVICE_DEFAULT + #endif }; enum map_flags { map_read = CL_MAP_READ, map_write = CL_MAP_WRITE - #ifdef CL_VERSION_1_2 + #ifdef BOOST_COMPUTE_CL_VERSION_1_2 , map_write_invalidate_region = CL_MAP_WRITE_INVALIDATE_REGION #endif @@ -118,7 +123,7 @@ public: cl_int error = 0; - #ifdef CL_VERSION_2_0 + #ifdef BOOST_COMPUTE_CL_VERSION_2_0 if (device.check_version(2, 0)){ std::vector<cl_queue_properties> queue_properties; if(properties){ @@ -323,7 +328,7 @@ public: return event_; } - #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + #if defined(BOOST_COMPUTE_CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a command to read a rectangular region from \p buffer to /// host memory. /// @@ -417,7 +422,7 @@ public: return event_; } - #endif // CL_VERSION_1_1 + #endif // BOOST_COMPUTE_CL_VERSION_1_1 /// Enqueues a command to write data from host memory to \p buffer. /// @@ -494,7 +499,7 @@ public: return event_; } - #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + #if defined(BOOST_COMPUTE_CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a command to write a rectangular region from host memory /// to \p buffer. /// @@ -588,7 +593,7 @@ public: return event_; } - #endif // CL_VERSION_1_1 + #endif // BOOST_COMPUTE_CL_VERSION_1_1 /// Enqueues a command to copy data from \p src_buffer to /// \p dst_buffer. @@ -630,7 +635,7 @@ public: return event_; } - #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + #if defined(BOOST_COMPUTE_CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a command to copy a rectangular region from /// \p src_buffer to \p dst_buffer. /// @@ -676,9 +681,9 @@ public: return event_; } - #endif // CL_VERSION_1_1 + #endif // BOOST_COMPUTE_CL_VERSION_1_1 - #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a command to fill \p buffer with \p pattern. /// /// \see_opencl_ref{clEnqueueFillBuffer} @@ -717,7 +722,7 @@ public: return event_; } - #endif // CL_VERSION_1_2 + #endif // BOOST_COMPUTE_CL_VERSION_1_2 /// Enqueues a command to map \p buffer into the host address space. /// Event associated with map operation is returned through @@ -1269,7 +1274,7 @@ public: return event_; } - #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a command to fill \p image with \p fill_color. /// /// \see_opencl_ref{clEnqueueFillImage} @@ -1354,7 +1359,7 @@ public: return event_; } - #endif // CL_VERSION_1_2 + #endif // BOOST_COMPUTE_CL_VERSION_1_2 /// Enqueues a kernel for execution. /// @@ -1438,7 +1443,7 @@ public: // clEnqueueTask() was deprecated in OpenCL 2.0. In that case we // just forward to the equivalent clEnqueueNDRangeKernel() call. - #ifdef CL_VERSION_2_0 + #ifdef BOOST_COMPUTE_CL_VERSION_2_0 size_t one = 1; cl_int ret = clEnqueueNDRangeKernel( m_queue, kernel, 1, 0, &one, &one, @@ -1511,7 +1516,10 @@ public: { BOOST_ASSERT(m_queue != 0); - clFlush(m_queue); + cl_int ret = clFlush(m_queue); + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } } /// Blocks until all outstanding commands in the queue have finished. @@ -1521,7 +1529,10 @@ public: { BOOST_ASSERT(m_queue != 0); - clFinish(m_queue); + cl_int ret = clFinish(m_queue); + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } } /// Enqueues a barrier in the queue. @@ -1530,11 +1541,11 @@ public: BOOST_ASSERT(m_queue != 0); cl_int ret = CL_SUCCESS; - #ifdef CL_VERSION_1_2 + #ifdef BOOST_COMPUTE_CL_VERSION_1_2 if(get_device().check_version(1, 2)){ ret = clEnqueueBarrierWithWaitList(m_queue, 0, 0, 0); } else - #endif // CL_VERSION_1_2 + #endif // BOOST_COMPUTE_CL_VERSION_1_2 { // Suppress deprecated declarations warning BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS(); @@ -1547,7 +1558,7 @@ public: } } - #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a barrier in the queue after \p events. /// /// \opencl_version_warning{1,2} @@ -1568,7 +1579,7 @@ public: return event_; } - #endif // CL_VERSION_1_2 + #endif // BOOST_COMPUTE_CL_VERSION_1_2 /// Enqueues a marker in the queue and returns an event that can be /// used to track its progress. @@ -1577,7 +1588,7 @@ public: event event_; cl_int ret = CL_SUCCESS; - #ifdef CL_VERSION_1_2 + #ifdef BOOST_COMPUTE_CL_VERSION_1_2 if(get_device().check_version(1, 2)){ ret = clEnqueueMarkerWithWaitList(m_queue, 0, 0, &event_.get()); } else @@ -1596,7 +1607,7 @@ public: return event_; } - #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a marker after \p events in the queue and returns an /// event that can be used to track its progress. /// @@ -1615,9 +1626,9 @@ public: return event_; } - #endif // CL_VERSION_1_2 + #endif // BOOST_COMPUTE_CL_VERSION_1_2 - #if defined(CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a command to copy \p size bytes of data from \p src_ptr to /// \p dst_ptr. /// @@ -1797,7 +1808,7 @@ public: return event_; } - #endif // CL_VERSION_2_0 + #endif // BOOST_COMPUTE_CL_VERSION_2_0 /// Returns \c true if the command queue is the same at \p other. bool operator==(const command_queue &other) const diff --git a/boost/compute/container/detail/scalar.hpp b/boost/compute/container/detail/scalar.hpp index 7ecd86e540..4dee76d2b4 100644 --- a/boost/compute/container/detail/scalar.hpp +++ b/boost/compute/container/detail/scalar.hpp @@ -12,6 +12,7 @@ #define BOOST_COMPUTE_CONTAINER_DETAIL_SCALAR_HPP #include <boost/compute/buffer.hpp> +#include <boost/compute/event.hpp> #include <boost/compute/detail/read_write_single_value.hpp> namespace boost { @@ -40,9 +41,9 @@ public: return read_single_value<T>(m_buffer, 0, queue); } - void write(const T &value, command_queue &queue) + event write(const T &value, command_queue &queue) { - write_single_value<T>(value, m_buffer, 0, queue); + return write_single_value<T>(value, m_buffer, 0, queue); } const buffer& get_buffer() const diff --git a/boost/compute/container/valarray.hpp b/boost/compute/container/valarray.hpp index 8ac8e01753..981ec2e545 100644 --- a/boost/compute/container/valarray.hpp +++ b/boost/compute/container/valarray.hpp @@ -67,6 +67,7 @@ public: valarray(const valarray<T> &other) : m_buffer(other.m_buffer.get_context(), other.size() * sizeof(T)) { + copy(other.begin(), other.end(), begin()); } valarray(const std::valarray<T> &valarray, diff --git a/boost/compute/context.hpp b/boost/compute/context.hpp index 61e84e9767..13154a968a 100644 --- a/boost/compute/context.hpp +++ b/boost/compute/context.hpp @@ -233,11 +233,11 @@ BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(context, ((std::vector<cl_context_properties>, CL_CONTEXT_PROPERTIES)) ) -#ifdef CL_VERSION_1_1 +#ifdef BOOST_COMPUTE_CL_VERSION_1_1 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(context, ((cl_uint, CL_CONTEXT_NUM_DEVICES)) ) -#endif // CL_VERSION_1_1 +#endif // BOOST_COMPUTE_CL_VERSION_1_1 } // end compute namespace } // end boost namespace diff --git a/boost/compute/detail/buffer_value.hpp b/boost/compute/detail/buffer_value.hpp index 6a4e78fc19..478fc03252 100644 --- a/boost/compute/detail/buffer_value.hpp +++ b/boost/compute/detail/buffer_value.hpp @@ -124,7 +124,9 @@ public: const context &context = m_buffer.get_context(); command_queue queue(context, context.get_device()); - detail::write_single_value<T>(value, m_buffer, m_index / sizeof(T), queue); + detail::write_single_value<T>( + value, m_buffer, m_index / sizeof(T), queue + ).wait(); return *this; } diff --git a/boost/compute/detail/duration.hpp b/boost/compute/detail/duration.hpp index 601f12d291..98e825fb3c 100644 --- a/boost/compute/detail/duration.hpp +++ b/boost/compute/detail/duration.hpp @@ -17,7 +17,9 @@ #include <chrono> #endif +#ifndef BOOST_COMPUTE_NO_BOOST_CHRONO #include <boost/chrono/duration.hpp> +#endif namespace boost { namespace compute { @@ -34,6 +36,7 @@ make_duration_from_nanoseconds(std::chrono::duration<Rep, Period>, size_t nanose } #endif // BOOST_COMPUTE_NO_HDR_CHRONO +#ifndef BOOST_COMPUTE_NO_BOOST_CHRONO template<class Rep, class Period> inline boost::chrono::duration<Rep, Period> make_duration_from_nanoseconds(boost::chrono::duration<Rep, Period>, size_t nanoseconds) @@ -42,6 +45,7 @@ make_duration_from_nanoseconds(boost::chrono::duration<Rep, Period>, size_t nano boost::chrono::nanoseconds(nanoseconds) ); } +#endif // BOOST_COMPUTE_NO_BOOST_CHRONO } // end detail namespace } // end compute namespace diff --git a/boost/compute/detail/meta_kernel.hpp b/boost/compute/detail/meta_kernel.hpp index 5e6d6e0337..13af7cc437 100644 --- a/boost/compute/detail/meta_kernel.hpp +++ b/boost/compute/detail/meta_kernel.hpp @@ -1036,7 +1036,7 @@ inline meta_kernel& operator<<(meta_kernel &kernel, } // SVM requires OpenCL 2.0 -#if defined(CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) +#if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) template<class T, class IndexExpr> inline meta_kernel& operator<<(meta_kernel &kernel, const svm_ptr_index_expr<T, IndexExpr> &expr) @@ -1072,7 +1072,7 @@ inline meta_kernel& operator<<(meta_kernel &kernel, BOOST_STATIC_ASSERT(N < 16); if(N < 10){ - return kernel << expr.m_arg << ".s" << uint_(N); + return kernel << expr.m_arg << ".s" << int_(N); } else if(N < 16){ #ifdef _MSC_VER diff --git a/boost/compute/detail/parameter_cache.hpp b/boost/compute/detail/parameter_cache.hpp index 0a16cd9b0e..c609490c1e 100644 --- a/boost/compute/detail/parameter_cache.hpp +++ b/boost/compute/detail/parameter_cache.hpp @@ -24,6 +24,7 @@ #include <boost/compute/version.hpp> #ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE +#include <cstdio> #include <boost/algorithm/string/trim.hpp> #include <boost/compute/detail/path.hpp> #include <boost/property_tree/ptree.hpp> @@ -117,9 +118,16 @@ private: static std::string version_string() { char buf[32]; - std::snprintf(buf, sizeof(buf), "%d.%d.%d", BOOST_COMPUTE_VERSION_MAJOR, - BOOST_COMPUTE_VERSION_MINOR, - BOOST_COMPUTE_VERSION_PATCH); + // snprintf is in Visual Studio since Visual Studio 2015 (_MSC_VER == 1900) + #if defined (_MSC_VER) && _MSC_VER < 1900 + #define DETAIL_SNPRINTF sprintf_s + #else + #define DETAIL_SNPRINTF std::snprintf + #endif + DETAIL_SNPRINTF(buf, sizeof(buf), "%d.%d.%d", BOOST_COMPUTE_VERSION_MAJOR, + BOOST_COMPUTE_VERSION_MINOR, + BOOST_COMPUTE_VERSION_PATCH); + #undef DETAIL_SNPRINTF return buf; } diff --git a/boost/compute/detail/path.hpp b/boost/compute/detail/path.hpp index ec8760eaf9..d9c5afd182 100644 --- a/boost/compute/detail/path.hpp +++ b/boost/compute/detail/path.hpp @@ -30,7 +30,7 @@ static const std::string& path_delim() // Path to appdata folder. inline const std::string& appdata_path() { - #ifdef WIN32 + #ifdef _WIN32 static const std::string appdata = detail::getenv("APPDATA") + path_delim() + "boost_compute"; #else diff --git a/boost/compute/detail/read_write_single_value.hpp b/boost/compute/detail/read_write_single_value.hpp index fde40d946c..3e613bc8c3 100644 --- a/boost/compute/detail/read_write_single_value.hpp +++ b/boost/compute/detail/read_write_single_value.hpp @@ -14,6 +14,7 @@ #include <boost/throw_exception.hpp> #include <boost/compute/buffer.hpp> +#include <boost/compute/event.hpp> #include <boost/compute/exception.hpp> #include <boost/compute/command_queue.hpp> @@ -47,18 +48,18 @@ inline T read_single_value(const buffer &buffer, command_queue &queue) // writes a single value at index to the buffer template<class T> -inline void write_single_value(const T &value, - const buffer &buffer, - size_t index, - command_queue &queue) +inline event write_single_value(const T &value, + const buffer &buffer, + size_t index, + command_queue &queue) { BOOST_ASSERT(index < buffer.size() / sizeof(T)); BOOST_ASSERT(buffer.get_context() == queue.get_context()); - queue.enqueue_write_buffer(buffer, - index * sizeof(T), - sizeof(T), - &value); + return queue.enqueue_write_buffer(buffer, + index * sizeof(T), + sizeof(T), + &value); } // writes value to the first location in buffer diff --git a/boost/compute/device.hpp b/boost/compute/device.hpp index 5cf2e8c931..427d75d60b 100644 --- a/boost/compute/device.hpp +++ b/boost/compute/device.hpp @@ -62,7 +62,7 @@ public: explicit device(cl_device_id id, bool retain = true) : m_id(id) { - #ifdef CL_VERSION_1_2 + #ifdef BOOST_COMPUTE_CL_VERSION_1_2 if(m_id && retain && is_subdevice()){ clRetainDevice(m_id); } @@ -75,7 +75,7 @@ public: device(const device &other) : m_id(other.m_id) { - #ifdef CL_VERSION_1_2 + #ifdef BOOST_COMPUTE_CL_VERSION_1_2 if(m_id && is_subdevice()){ clRetainDevice(m_id); } @@ -86,7 +86,7 @@ public: device& operator=(const device &other) { if(this != &other){ - #ifdef CL_VERSION_1_2 + #ifdef BOOST_COMPUTE_CL_VERSION_1_2 if(m_id && is_subdevice()){ clReleaseDevice(m_id); } @@ -94,7 +94,7 @@ public: m_id = other.m_id; - #ifdef CL_VERSION_1_2 + #ifdef BOOST_COMPUTE_CL_VERSION_1_2 if(m_id && is_subdevice()){ clRetainDevice(m_id); } @@ -115,7 +115,7 @@ public: /// Move-assigns the device from \p other to \c *this. device& operator=(device&& other) BOOST_NOEXCEPT { - #ifdef CL_VERSION_1_2 + #ifdef BOOST_COMPUTE_CL_VERSION_1_2 if(m_id && is_subdevice()){ clReleaseDevice(m_id); } @@ -131,7 +131,7 @@ public: /// Destroys the device object. ~device() { - #ifdef CL_VERSION_1_2 + #ifdef BOOST_COMPUTE_CL_VERSION_1_2 if(m_id && is_subdevice()){ BOOST_COMPUTE_ASSERT_CL_SUCCESS( clReleaseDevice(m_id) @@ -282,7 +282,7 @@ public: /// Returns \c true if the device is a sub-device. bool is_subdevice() const { - #if defined(CL_VERSION_1_2) + #if defined(BOOST_COMPUTE_CL_VERSION_1_2) try { return get_info<cl_device_id>(CL_DEVICE_PARENT_DEVICE) != 0; } @@ -321,7 +321,7 @@ public: typename detail::get_object_info_type<device, Enum>::type get_info() const; - #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Partitions the device into multiple sub-devices according to /// \p properties. /// @@ -393,7 +393,7 @@ public: return partition(properties); } - #endif // CL_VERSION_1_2 + #endif // BOOST_COMPUTE_CL_VERSION_1_2 /// Returns \c true if the device is the same at \p other. bool operator==(const device &other) const @@ -528,7 +528,7 @@ BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device, ) #endif -#ifdef CL_VERSION_1_1 +#ifdef BOOST_COMPUTE_CL_VERSION_1_1 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device, ((bool, CL_DEVICE_HOST_UNIFIED_MEMORY)) ((cl_uint, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR)) @@ -539,9 +539,9 @@ BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device, ((cl_uint, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE)) ((std::string, CL_DEVICE_OPENCL_C_VERSION)) ) -#endif // CL_VERSION_1_1 +#endif // BOOST_COMPUTE_CL_VERSION_1_1 -#ifdef CL_VERSION_1_2 +#ifdef BOOST_COMPUTE_CL_VERSION_1_2 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device, ((std::string, CL_DEVICE_BUILT_IN_KERNELS)) ((bool, CL_DEVICE_LINKER_AVAILABLE)) @@ -554,9 +554,9 @@ BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device, ((bool, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC)) ((cl_uint, CL_DEVICE_REFERENCE_COUNT)) ) -#endif // CL_VERSION_1_2 +#endif // BOOST_COMPUTE_CL_VERSION_1_2 -#ifdef CL_VERSION_2_0 +#ifdef BOOST_COMPUTE_CL_VERSION_2_0 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device, ((size_t, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE)) ((size_t, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE)) @@ -576,7 +576,7 @@ BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(device, ((cl_uint, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT)) ((cl_uint, CL_DEVICE_IMAGE_PITCH_ALIGNMENT)) ) -#endif // CL_VERSION_2_0 +#endif // BOOST_COMPUTE_CL_VERSION_2_0 } // end compute namespace } // end boost namespace diff --git a/boost/compute/event.hpp b/boost/compute/event.hpp index 2f53d87650..030c1bb3ba 100644 --- a/boost/compute/event.hpp +++ b/boost/compute/event.hpp @@ -74,7 +74,7 @@ public: marker = CL_COMMAND_MARKER, aquire_gl_objects = CL_COMMAND_ACQUIRE_GL_OBJECTS, release_gl_object = CL_COMMAND_RELEASE_GL_OBJECTS - #if defined(CL_VERSION_1_1) + #if defined(BOOST_COMPUTE_CL_VERSION_1_1) , read_buffer_rect = CL_COMMAND_READ_BUFFER_RECT, write_buffer_rect = CL_COMMAND_WRITE_BUFFER_RECT, @@ -218,7 +218,7 @@ public: } } - #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + #if defined(BOOST_COMPUTE_CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Registers a function to be called when the event status changes to /// \p status (by default CL_COMPLETE). The callback is passed the OpenCL /// event object, the event status, and a pointer to arbitrary user data. @@ -254,7 +254,7 @@ public: new boost::function<void()>(callback) ); } - #endif // CL_VERSION_1_1 + #endif // BOOST_COMPUTE_CL_VERSION_1_1 /// Returns the total duration of the event from \p start to \p end. /// @@ -300,7 +300,7 @@ public: } private: - #ifdef CL_VERSION_1_1 + #ifdef BOOST_COMPUTE_CL_VERSION_1_1 /// \internal_ static void BOOST_COMPUTE_CL_CALLBACK event_callback_invoker(cl_event, cl_int, void *user_data) @@ -312,7 +312,7 @@ private: delete callback; } - #endif // CL_VERSION_1_1 + #endif // BOOST_COMPUTE_CL_VERSION_1_1 protected: cl_event m_event; @@ -326,7 +326,7 @@ BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(event, ((cl_uint, CL_EVENT_REFERENCE_COUNT)) ) -#ifdef CL_VERSION_1_1 +#ifdef BOOST_COMPUTE_CL_VERSION_1_1 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(event, ((cl_context, CL_EVENT_CONTEXT)) ) diff --git a/boost/compute/exception/opencl_error.hpp b/boost/compute/exception/opencl_error.hpp index 29a3a9d258..0f47b56f84 100644 --- a/boost/compute/exception/opencl_error.hpp +++ b/boost/compute/exception/opencl_error.hpp @@ -123,7 +123,7 @@ public: case CL_INVALID_BUFFER_SIZE: return "Invalid Buffer Size"; case CL_INVALID_MIP_LEVEL: return "Invalid MIP Level"; case CL_INVALID_GLOBAL_WORK_SIZE: return "Invalid Global Work Size"; - #ifdef CL_VERSION_1_2 + #ifdef BOOST_COMPUTE_CL_VERSION_1_2 case CL_COMPILE_PROGRAM_FAILURE: return "Compile Program Failure"; case CL_LINKER_NOT_AVAILABLE: return "Linker Not Available"; case CL_LINK_PROGRAM_FAILURE: return "Link Program Failure"; @@ -134,8 +134,8 @@ public: case CL_INVALID_COMPILER_OPTIONS: return "Invalid Compiler Options"; case CL_INVALID_LINKER_OPTIONS: return "Invalid Linker Options"; case CL_INVALID_DEVICE_PARTITION_COUNT: return "Invalid Device Partition Count"; - #endif // CL_VERSION_1_2 - #ifdef CL_VERSION_2_0 + #endif // BOOST_COMPUTE_CL_VERSION_1_2 + #ifdef BOOST_COMPUTE_CL_VERSION_2_0 case CL_INVALID_PIPE_SIZE: return "Invalid Pipe Size"; case CL_INVALID_DEVICE_QUEUE: return "Invalid Device Queue"; #endif diff --git a/boost/compute/function.hpp b/boost/compute/function.hpp index b0b893e948..a731e18751 100644 --- a/boost/compute/function.hpp +++ b/boost/compute/function.hpp @@ -164,6 +164,19 @@ public: m_definitions[name] = value; } + bool operator==(const function<Signature>& other) const + { + return + (m_name == other.m_name) + && (m_definitions == other.m_definitions) + && (m_source == other.m_source); + } + + bool operator!=(const function<Signature>& other) const + { + return !(*this == other); + } + /// \internal_ detail::invoked_function<result_type, boost::tuple<> > operator()() const diff --git a/boost/compute/functional/atomic.hpp b/boost/compute/functional/atomic.hpp index 2701561bc3..ac4ac70d7f 100644 --- a/boost/compute/functional/atomic.hpp +++ b/boost/compute/functional/atomic.hpp @@ -15,7 +15,7 @@ #include <boost/compute/function.hpp> #ifndef BOOST_COMPUTE_DOXYGEN_INVOKED -#ifdef CL_VERSION_1_1 +#ifdef BOOST_COMPUTE_CL_VERSION_1_1 #define BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "atomic_" #else #define BOOST_COMPUTE_DETAIL_ATOMIC_PREFIX "atom_" diff --git a/boost/compute/image/image1d.hpp b/boost/compute/image/image1d.hpp index 2d71934ab4..0f146be089 100644 --- a/boost/compute/image/image1d.hpp +++ b/boost/compute/image/image1d.hpp @@ -50,7 +50,7 @@ public: cl_mem_flags flags = read_write, void *host_ptr = 0) { - #ifdef CL_VERSION_1_2 + #ifdef BOOST_COMPUTE_CL_VERSION_1_2 cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE1D; desc.image_width = image_width; @@ -61,7 +61,7 @@ public: desc.image_slice_pitch = 0; desc.num_mip_levels = 0; desc.num_samples = 0; - #ifdef CL_VERSION_2_0 + #ifdef BOOST_COMPUTE_CL_VERSION_2_0 desc.mem_object = 0; #else desc.buffer = 0; @@ -151,7 +151,7 @@ public: static std::vector<image_format> get_supported_formats(const context &context, cl_mem_flags flags = read_write) { - #ifdef CL_VERSION_1_2 + #ifdef BOOST_COMPUTE_CL_VERSION_1_2 return image_object::get_supported_formats(context, CL_MEM_OBJECT_IMAGE1D, flags); #else return std::vector<image_format>(); @@ -164,7 +164,7 @@ public: const context &context, cl_mem_flags flags = read_write) { - #ifdef CL_VERSION_1_2 + #ifdef BOOST_COMPUTE_CL_VERSION_1_2 return image_object::is_supported_format( format, context, CL_MEM_OBJECT_IMAGE1D, flags ); diff --git a/boost/compute/image/image2d.hpp b/boost/compute/image/image2d.hpp index c203a9417f..edfe98de5a 100644 --- a/boost/compute/image/image2d.hpp +++ b/boost/compute/image/image2d.hpp @@ -58,7 +58,7 @@ public: { cl_int error = 0; - #ifdef CL_VERSION_1_2 + #ifdef BOOST_COMPUTE_CL_VERSION_1_2 cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE2D; desc.image_width = image_width; @@ -69,7 +69,7 @@ public: desc.image_slice_pitch = 0; desc.num_mip_levels = 0; desc.num_samples = 0; - #ifdef CL_VERSION_2_0 + #ifdef BOOST_COMPUTE_CL_VERSION_2_0 desc.mem_object = 0; #else desc.buffer = 0; @@ -108,7 +108,7 @@ public: { cl_int error = 0; - #ifdef CL_VERSION_1_2 + #ifdef BOOST_COMPUTE_CL_VERSION_1_2 cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE2D; desc.image_width = image_width; @@ -119,7 +119,7 @@ public: desc.image_slice_pitch = 0; desc.num_mip_levels = 0; desc.num_samples = 0; - #ifdef CL_VERSION_2_0 + #ifdef BOOST_COMPUTE_CL_VERSION_2_0 desc.mem_object = 0; #else desc.buffer = 0; diff --git a/boost/compute/image/image3d.hpp b/boost/compute/image/image3d.hpp index 9463cfaa16..5569f2a0b9 100644 --- a/boost/compute/image/image3d.hpp +++ b/boost/compute/image/image3d.hpp @@ -54,7 +54,7 @@ public: { cl_int error = 0; - #ifdef CL_VERSION_1_2 + #ifdef BOOST_COMPUTE_CL_VERSION_1_2 cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE3D; desc.image_width = image_width; @@ -65,7 +65,7 @@ public: desc.image_slice_pitch = image_slice_pitch; desc.num_mip_levels = 0; desc.num_samples = 0; - #ifdef CL_VERSION_2_0 + #ifdef BOOST_COMPUTE_CL_VERSION_2_0 desc.mem_object = 0; #else desc.buffer = 0; @@ -108,7 +108,7 @@ public: { cl_int error = 0; - #ifdef CL_VERSION_1_2 + #ifdef BOOST_COMPUTE_CL_VERSION_1_2 cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE3D; desc.image_width = image_width; @@ -119,7 +119,7 @@ public: desc.image_slice_pitch = image_slice_pitch; desc.num_mip_levels = 0; desc.num_samples = 0; - #ifdef CL_VERSION_2_0 + #ifdef BOOST_COMPUTE_CL_VERSION_2_0 desc.mem_object = 0; #else desc.buffer = 0; diff --git a/boost/compute/image/image_sampler.hpp b/boost/compute/image/image_sampler.hpp index 4f1bfe9b86..26f20aa9d5 100644 --- a/boost/compute/image/image_sampler.hpp +++ b/boost/compute/image/image_sampler.hpp @@ -55,7 +55,7 @@ public: { cl_int error = 0; - #ifdef CL_VERSION_2_0 + #ifdef BOOST_COMPUTE_CL_VERSION_2_0 std::vector<cl_sampler_properties> sampler_properties; sampler_properties.push_back(CL_SAMPLER_NORMALIZED_COORDS); sampler_properties.push_back(cl_sampler_properties(normalized_coords)); diff --git a/boost/compute/interop/opengl/context.hpp b/boost/compute/interop/opengl/context.hpp index c35fedddfa..077c86036b 100644 --- a/boost/compute/interop/opengl/context.hpp +++ b/boost/compute/interop/opengl/context.hpp @@ -75,6 +75,10 @@ inline context opengl_create_shared_context() for(size_t i = 0; i < platforms.size(); i++){ const platform &platform = platforms[i]; + // check whether this platform supports OpenCL/OpenGL sharing + if (!platform.supports_extension(cl_gl_sharing_extension)) + continue; + // load clGetGLContextInfoKHR() extension function GetGLContextInfoKHRFunction GetGLContextInfoKHR = reinterpret_cast<GetGLContextInfoKHRFunction>( @@ -92,7 +96,7 @@ inline context opengl_create_shared_context() #if defined(__linux__) CL_GL_CONTEXT_KHR, (cl_context_properties) glXGetCurrentContext(), CL_GLX_DISPLAY_KHR, (cl_context_properties) glXGetCurrentDisplay(), - #elif defined(WIN32) + #elif defined(_WIN32) CL_GL_CONTEXT_KHR, (cl_context_properties) wglGetCurrentContext(), CL_WGL_HDC_KHR, (cl_context_properties) wglGetCurrentDC(), #endif diff --git a/boost/compute/interop/opengl/opengl_texture.hpp b/boost/compute/interop/opengl/opengl_texture.hpp index c1f3f4f441..ae095d22a9 100644 --- a/boost/compute/interop/opengl/opengl_texture.hpp +++ b/boost/compute/interop/opengl/opengl_texture.hpp @@ -51,7 +51,7 @@ public: { cl_int error = 0; - #ifdef CL_VERSION_1_2 + #ifdef BOOST_COMPUTE_CL_VERSION_1_2 m_mem = clCreateFromGLTexture(context, flags, texture_target, diff --git a/boost/compute/iterator/buffer_iterator.hpp b/boost/compute/iterator/buffer_iterator.hpp index cd68058f64..de2395a552 100644 --- a/boost/compute/iterator/buffer_iterator.hpp +++ b/boost/compute/iterator/buffer_iterator.hpp @@ -58,13 +58,20 @@ struct buffer_iterator_index_expr size_t index, const memory_object::address_space address_space, const IndexExpr &expr) - : m_buffer(buffer), + : m_buffer(buffer.get(), false), m_index(index), m_address_space(address_space), m_expr(expr) { } + ~buffer_iterator_index_expr() + { + // set buffer to null so that its reference count will + // not be decremented when its destructor is called + m_buffer.get() = 0; + } + operator T() const { BOOST_STATIC_ASSERT_MSG(boost::is_integral<IndexExpr>::value, @@ -73,10 +80,10 @@ struct buffer_iterator_index_expr return buffer_value<T>(m_buffer, size_t(m_expr) * sizeof(T)); } - const buffer &m_buffer; - size_t m_index; - memory_object::address_space m_address_space; - IndexExpr m_expr; + const buffer m_buffer; + const size_t m_index; + const memory_object::address_space m_address_space; + const IndexExpr m_expr; }; template<class T, class IndexExpr> diff --git a/boost/compute/iterator/counting_iterator.hpp b/boost/compute/iterator/counting_iterator.hpp index 304c1e05cf..384486bcb3 100644 --- a/boost/compute/iterator/counting_iterator.hpp +++ b/boost/compute/iterator/counting_iterator.hpp @@ -47,14 +47,14 @@ struct counting_iterator_index_expr { typedef T result_type; - counting_iterator_index_expr(const T &init, const IndexExpr &expr) + counting_iterator_index_expr(const T init, const IndexExpr &expr) : m_init(init), m_expr(expr) { } - const T &m_init; - IndexExpr m_expr; + const T m_init; + const IndexExpr m_expr; }; template<class T, class IndexExpr> diff --git a/boost/compute/iterator/function_input_iterator.hpp b/boost/compute/iterator/function_input_iterator.hpp index bd89b6c0fc..bb8f0d5d09 100644 --- a/boost/compute/iterator/function_input_iterator.hpp +++ b/boost/compute/iterator/function_input_iterator.hpp @@ -53,7 +53,7 @@ struct function_input_iterator_expr { } - Function m_function; + const Function m_function; }; template<class Function> diff --git a/boost/compute/iterator/permutation_iterator.hpp b/boost/compute/iterator/permutation_iterator.hpp index 8a7f97a402..a0d2dbfbc5 100644 --- a/boost/compute/iterator/permutation_iterator.hpp +++ b/boost/compute/iterator/permutation_iterator.hpp @@ -60,9 +60,9 @@ struct permutation_iterator_access_expr { } - ElementIterator m_element_iter; - IndexIterator m_index_iter; - IndexExpr m_expr; + const ElementIterator m_element_iter; + const IndexIterator m_index_iter; + const IndexExpr m_expr; }; template<class ElementIterator, class IndexIterator, class IndexExpr> diff --git a/boost/compute/iterator/strided_iterator.hpp b/boost/compute/iterator/strided_iterator.hpp index 52e7f07bd8..eb342dc33d 100644 --- a/boost/compute/iterator/strided_iterator.hpp +++ b/boost/compute/iterator/strided_iterator.hpp @@ -56,8 +56,8 @@ struct stride_expr { } - IndexExpr m_index_expr; - Stride m_stride; + const IndexExpr m_index_expr; + const Stride m_stride; }; template<class IndexExpr, class Stride> @@ -90,9 +90,9 @@ struct strided_iterator_index_expr { } - Iterator m_input_iter; - const Stride& m_stride; - IndexExpr m_index_expr; + const Iterator m_input_iter; + const Stride m_stride; + const IndexExpr m_index_expr; }; template<class Iterator, class Stride, class IndexExpr> diff --git a/boost/compute/iterator/transform_iterator.hpp b/boost/compute/iterator/transform_iterator.hpp index c040922f9d..08ff6ee4d4 100644 --- a/boost/compute/iterator/transform_iterator.hpp +++ b/boost/compute/iterator/transform_iterator.hpp @@ -76,9 +76,9 @@ struct transform_iterator_index_expr { } - InputIterator m_input_iter; - UnaryFunction m_transform_expr; - IndexExpr m_index_expr; + const InputIterator m_input_iter; + const UnaryFunction m_transform_expr; + const IndexExpr m_index_expr; }; template<class InputIterator, class UnaryFunction, class IndexExpr> diff --git a/boost/compute/iterator/zip_iterator.hpp b/boost/compute/iterator/zip_iterator.hpp index 2860d73a93..a4af6a3034 100644 --- a/boost/compute/iterator/zip_iterator.hpp +++ b/boost/compute/iterator/zip_iterator.hpp @@ -92,8 +92,8 @@ struct zip_iterator_index_expr { } - IteratorTuple m_iterators; - IndexExpr m_index_expr; + const IteratorTuple m_iterators; + const IndexExpr m_index_expr; }; /// \internal_ diff --git a/boost/compute/kernel.hpp b/boost/compute/kernel.hpp index 72f21a0378..097cba692e 100644 --- a/boost/compute/kernel.hpp +++ b/boost/compute/kernel.hpp @@ -168,7 +168,7 @@ public: typename detail::get_object_info_type<kernel, Enum>::type get_info() const; - #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Returns information about the argument at \p index. /// /// For example, to get the name of the first argument: @@ -197,7 +197,7 @@ public: template<int Enum> typename detail::get_object_info_type<kernel, Enum>::type get_arg_info(size_t index) const; - #endif // CL_VERSION_1_2 + #endif // BOOST_COMPUTE_CL_VERSION_1_2 /// Returns work-group information for the kernel with \p device. /// @@ -266,7 +266,7 @@ public: /// \internal_ void set_arg_svm_ptr(size_t index, void* ptr) { - #ifdef CL_VERSION_2_0 + #ifdef BOOST_COMPUTE_CL_VERSION_2_0 cl_int ret = clSetKernelArgSVMPointer(m_kernel, static_cast<cl_uint>(index), ptr); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); @@ -289,7 +289,7 @@ public: } #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES - #if defined(CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Sets additional execution information for the kernel. /// /// \opencl_version_warning{2,0} @@ -302,7 +302,7 @@ public: BOOST_THROW_EXCEPTION(opencl_error(ret)); } } - #endif // CL_VERSION_2_0 + #endif // BOOST_COMPUTE_CL_VERSION_2_0 /// Returns \c true if the kernel is the same at \p other. bool operator==(const kernel &other) const @@ -365,14 +365,14 @@ BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel, ((cl_program, CL_KERNEL_PROGRAM)) ) -#ifdef CL_VERSION_1_2 +#ifdef BOOST_COMPUTE_CL_VERSION_1_2 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel, ((std::string, CL_KERNEL_ATTRIBUTES)) ) -#endif // CL_VERSION_1_2 +#endif // BOOST_COMPUTE_CL_VERSION_1_2 /// \internal_ define get_arg_info() specializations for kernel -#ifdef CL_VERSION_1_2 +#ifdef BOOST_COMPUTE_CL_VERSION_1_2 #define BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(result_type, value) \ namespace detail { \ template<> struct get_object_info_type<kernel, value> { typedef result_type type; }; \ @@ -386,7 +386,7 @@ BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_acc BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(std::string, CL_KERNEL_ARG_TYPE_NAME) BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_type_qualifier, CL_KERNEL_ARG_TYPE_QUALIFIER) BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(std::string, CL_KERNEL_ARG_NAME) -#endif // CL_VERSION_1_2 +#endif // BOOST_COMPUTE_CL_VERSION_1_2 namespace detail { diff --git a/boost/compute/lambda/context.hpp b/boost/compute/lambda/context.hpp index ed25b79475..a7248d4639 100644 --- a/boost/compute/lambda/context.hpp +++ b/boost/compute/lambda/context.hpp @@ -78,6 +78,41 @@ struct context : proto::callable_context<context<Args> > stream << stream.lit(x); } + void operator()(proto::tag::terminal, const uchar_ &x) + { + stream << "(uchar)(" << stream.lit(uint_(x)) << "u)"; + } + + void operator()(proto::tag::terminal, const char_ &x) + { + stream << "(char)(" << stream.lit(int_(x)) << ")"; + } + + void operator()(proto::tag::terminal, const ushort_ &x) + { + stream << "(ushort)(" << stream.lit(x) << "u)"; + } + + void operator()(proto::tag::terminal, const short_ &x) + { + stream << "(short)(" << stream.lit(x) << ")"; + } + + void operator()(proto::tag::terminal, const uint_ &x) + { + stream << "(" << stream.lit(x) << "u)"; + } + + void operator()(proto::tag::terminal, const ulong_ &x) + { + stream << "(" << stream.lit(x) << "ul)"; + } + + void operator()(proto::tag::terminal, const long_ &x) + { + stream << "(" << stream.lit(x) << "l)"; + } + // handle placeholders template<int I> void operator()(proto::tag::terminal, placeholder<I>) diff --git a/boost/compute/lambda/functional.hpp b/boost/compute/lambda/functional.hpp index dd7190e4d9..42e75e4991 100644 --- a/boost/compute/lambda/functional.hpp +++ b/boost/compute/lambda/functional.hpp @@ -22,6 +22,11 @@ #include <boost/compute/lambda/result_of.hpp> #include <boost/compute/lambda/placeholder.hpp> +#include <boost/compute/types/fundamental.hpp> +#include <boost/compute/type_traits/scalar_type.hpp> +#include <boost/compute/type_traits/vector_size.hpp> +#include <boost/compute/type_traits/make_vector_type.hpp> + namespace boost { namespace compute { namespace lambda { @@ -29,7 +34,8 @@ namespace lambda { namespace mpl = boost::mpl; namespace proto = boost::proto; -// wraps a unary boolean function +// wraps a unary boolean function whose result type is an int_ when the argument +// type is a scalar, and intN_ if the argument type is a vector of size N #define BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(name) \ namespace detail { \ struct BOOST_PP_CAT(name, _func) \ @@ -37,7 +43,12 @@ namespace proto = boost::proto; template<class Expr, class Args> \ struct lambda_result \ { \ - typedef int type; \ + typedef typename proto::result_of::child_c<Expr, 1>::type Arg; \ + typedef typename ::boost::compute::lambda::result_of<Arg, Args>::type result_type; \ + typedef typename ::boost::compute::make_vector_type< \ + ::boost::compute::int_, \ + ::boost::compute::vector_size<result_type>::value \ + >::type type; \ }; \ \ template<class Context, class Arg> \ @@ -60,7 +71,7 @@ namespace proto = boost::proto; ); \ } -// wraps a unary function who's return type is the same as the argument type +// wraps a unary function whose return type is the same as the argument type #define BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(name) \ namespace detail { \ struct BOOST_PP_CAT(name, _func) \ @@ -92,7 +103,79 @@ namespace proto = boost::proto; ); \ } -// wraps a binary function +// wraps a unary function whose result type is the scalar type of the first argument +#define BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_ST(name) \ + namespace detail { \ + struct BOOST_PP_CAT(name, _func) \ + { \ + template<class Expr, class Args> \ + struct lambda_result \ + { \ + typedef typename proto::result_of::child_c<Expr, 1>::type Arg; \ + typedef typename ::boost::compute::lambda::result_of<Arg, Args>::type result_type; \ + typedef typename ::boost::compute::scalar_type<result_type>::type type; \ + }; \ + \ + template<class Context, class Arg> \ + static void apply(Context &ctx, const Arg &arg) \ + { \ + ctx.stream << #name << "("; \ + proto::eval(arg, ctx); \ + ctx.stream << ")"; \ + } \ + }; \ + } \ + template<class Arg> \ + inline typename proto::result_of::make_expr< \ + proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg& \ + >::type const \ + name(const Arg &arg) \ + { \ + return proto::make_expr<proto::tag::function>( \ + BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg) \ + ); \ + } + +// wraps a binary boolean function whose result type is an int_ when the first +// argument type is a scalar, and intN_ if the first argument type is a vector +// of size N +#define BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(name) \ + namespace detail { \ + struct BOOST_PP_CAT(name, _func) \ + { \ + template<class Expr, class Args> \ + struct lambda_result \ + { \ + typedef typename proto::result_of::child_c<Expr, 1>::type Arg1; \ + typedef typename ::boost::compute::make_vector_type< \ + ::boost::compute::int_, \ + ::boost::compute::vector_size<Arg1>::value \ + >::type type; \ + }; \ + \ + template<class Context, class Arg1, class Arg2> \ + static void apply(Context &ctx, const Arg1 &arg1, const Arg2 &arg2) \ + { \ + ctx.stream << #name << "("; \ + proto::eval(arg1, ctx); \ + ctx.stream << ", "; \ + proto::eval(arg2, ctx); \ + ctx.stream << ")"; \ + } \ + }; \ + } \ + template<class Arg1, class Arg2> \ + inline typename proto::result_of::make_expr< \ + proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg1&, const Arg2& \ + >::type const \ + name(const Arg1 &arg1, const Arg2 &arg2) \ + { \ + return proto::make_expr<proto::tag::function>( \ + BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg1), ::boost::ref(arg2) \ + ); \ + } + +// wraps a binary function whose result type is the type of the first argument #define BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(name) \ namespace detail { \ struct BOOST_PP_CAT(name, _func) \ @@ -126,6 +209,40 @@ namespace proto = boost::proto; ); \ } +// wraps a binary function whose result type is the type of the second argument +#define BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_2(name) \ + namespace detail { \ + struct BOOST_PP_CAT(name, _func) \ + { \ + template<class Expr, class Args> \ + struct lambda_result \ + { \ + typedef typename proto::result_of::child_c<Expr, 2>::type Arg2; \ + typedef typename ::boost::compute::lambda::result_of<Arg2, Args>::type type; \ + }; \ + \ + template<class Context, class Arg1, class Arg2> \ + static void apply(Context &ctx, const Arg1 &arg1, const Arg2 &arg2) \ + { \ + ctx.stream << #name << "("; \ + proto::eval(arg1, ctx); \ + ctx.stream << ", "; \ + proto::eval(arg2, ctx); \ + ctx.stream << ")"; \ + } \ + }; \ + } \ + template<class Arg1, class Arg2> \ + inline typename proto::result_of::make_expr< \ + proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg1&, const Arg2& \ + >::type const \ + name(const Arg1 &arg1, const Arg2 &arg2) \ + { \ + return proto::make_expr<proto::tag::function>( \ + BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg1), ::boost::ref(arg2) \ + ); \ + } + // wraps a binary function who's result type is the scalar type of the first argument #define BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_ST(name) \ namespace detail { \ @@ -161,6 +278,41 @@ namespace proto = boost::proto; ); \ } +// wraps a binary function whose result type is the type of the first argument +// and the second argument is a pointer +#define BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_PTR(name) \ + namespace detail { \ + struct BOOST_PP_CAT(name, _func) \ + { \ + template<class Expr, class Args> \ + struct lambda_result \ + { \ + typedef typename proto::result_of::child_c<Expr, 1>::type Arg1; \ + typedef typename ::boost::compute::lambda::result_of<Arg1, Args>::type type; \ + }; \ + \ + template<class Context, class Arg1, class Arg2> \ + static void apply(Context &ctx, const Arg1 &arg1, const Arg2 &arg2) \ + { \ + ctx.stream << #name << "("; \ + proto::eval(arg1, ctx); \ + ctx.stream << ", &"; \ + proto::eval(arg2, ctx); \ + ctx.stream << ")"; \ + } \ + }; \ + } \ + template<class Arg1, class Arg2> \ + inline typename proto::result_of::make_expr< \ + proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg1&, const Arg2& \ + >::type const \ + name(const Arg1 &arg1, const Arg2 &arg2) \ + { \ + return proto::make_expr<proto::tag::function>( \ + BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg1), ::boost::ref(arg2) \ + ); \ + } + // wraps a ternary function #define BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(name) \ namespace detail { \ @@ -197,43 +349,246 @@ namespace proto = boost::proto; ); \ } +// wraps a ternary function whose result type is the type of the third argument +#define BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION_3(name) \ + namespace detail { \ + struct BOOST_PP_CAT(name, _func) \ + { \ + template<class Expr, class Args> \ + struct lambda_result \ + { \ + typedef typename proto::result_of::child_c<Expr, 3>::type Arg3; \ + typedef typename ::boost::compute::lambda::result_of<Arg3, Args>::type type; \ + }; \ + \ + template<class Context, class Arg1, class Arg2, class Arg3> \ + static void apply(Context &ctx, const Arg1 &arg1, const Arg2 &arg2, const Arg3 &arg3) \ + { \ + ctx.stream << #name << "("; \ + proto::eval(arg1, ctx); \ + ctx.stream << ", "; \ + proto::eval(arg2, ctx); \ + ctx.stream << ", "; \ + proto::eval(arg3, ctx); \ + ctx.stream << ")"; \ + } \ + }; \ + } \ + template<class Arg1, class Arg2, class Arg3> \ + inline typename proto::result_of::make_expr< \ + proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg1&, const Arg2&, const Arg3& \ + >::type const \ + name(const Arg1 &arg1, const Arg2 &arg2, const Arg3 &arg3) \ + { \ + return proto::make_expr<proto::tag::function>( \ + BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg1), ::boost::ref(arg2), ::boost::ref(arg3) \ + ); \ + } -BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(all) -BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(any) -BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(isinf) -BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(isnan) -BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(isfinite) +// wraps a ternary function whose result type is the type of the first argument +// and the third argument of the function is a pointer +#define BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION_PTR(name) \ + namespace detail { \ + struct BOOST_PP_CAT(name, _func) \ + { \ + template<class Expr, class Args> \ + struct lambda_result \ + { \ + typedef typename proto::result_of::child_c<Expr, 3>::type Arg3; \ + typedef typename ::boost::compute::lambda::result_of<Arg3, Args>::type type; \ + }; \ + \ + template<class Context, class Arg1, class Arg2, class Arg3> \ + static void apply(Context &ctx, const Arg1 &arg1, const Arg2 &arg2, const Arg3 &arg3) \ + { \ + ctx.stream << #name << "("; \ + proto::eval(arg1, ctx); \ + ctx.stream << ", "; \ + proto::eval(arg2, ctx); \ + ctx.stream << ", &"; \ + proto::eval(arg3, ctx); \ + ctx.stream << ")"; \ + } \ + }; \ + } \ + template<class Arg1, class Arg2, class Arg3> \ + inline typename proto::result_of::make_expr< \ + proto::tag::function, BOOST_PP_CAT(detail::name, _func), const Arg1&, const Arg2&, const Arg3& \ + >::type const \ + name(const Arg1 &arg1, const Arg2 &arg2, const Arg3 &arg3) \ + { \ + return proto::make_expr<proto::tag::function>( \ + BOOST_PP_CAT(detail::name, _func)(), ::boost::ref(arg1), ::boost::ref(arg2), ::boost::ref(arg3) \ + ); \ + } +// Common Built-In Functions +BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(clamp) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(degrees) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(min) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(max) +BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(mix) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(radians) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(sign) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_2(step) +BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION_3(smoothstep) + +// Geometric Built-In Functions +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(cross) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_ST(dot) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_ST(distance) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_ST(length) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(normalize) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_ST(fast_distance) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_ST(fast_length) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(fast_normalize) + +// Integer Built-In Functions BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(abs) -BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(cos) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(abs_diff) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(add_sat) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(hadd) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(rhadd) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(clz) +#ifdef BOOST_COMPUTE_CL_VERSION_2_0 +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(ctz) +#endif +// clamp() (since 1.1) already defined in common +BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(mad_hi) +BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(mad24) +BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(mad_sat) +// max() and min() functions are defined in common +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(mul_hi) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(mul24) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(rotate) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(sub_sat) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(upsample) +#ifdef BOOST_COMPUTE_CL_VERSION_1_2 +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(popcount) +#endif + +// Math Built-In Functions BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(acos) -BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(sin) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(acosh) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(acospi) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(asin) -BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(tan) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(asinh) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(asinpi) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(atan) -BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(sqrt) -BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(rsqrt) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(atan2) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(atanh) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(atanpi) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(atan2pi) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(cbrt) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(ceil) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(copysign) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(cos) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(cosh) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(cospi) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(erfc) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(erf) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(exp) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(exp2) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(exp10) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(expm1) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(fabs) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(fdim) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(floor) +BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(fma) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(fmax) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(fmin) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(fmod) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_PTR(fract) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_PTR(frexp) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(hypot) +BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(ilogb) // ilogb returns intN_ +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(ldexp) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(lgamma) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_PTR(lgamma_r) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(log) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(log2) BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(log10) -BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(round) -BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(length) - -BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(cross) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(log1p) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(logb) +BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(mad) +#ifdef BOOST_COMPUTE_CL_VERSION_1_1 +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(maxmag) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(minmag) +#endif +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_PTR(modf) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(nan) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(nextafter) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(pow) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(pown) BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(powr) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(remainder) +BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION_PTR(remquo) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(rint) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(rootn) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(round) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(rsqrt) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(sin) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(sincos) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(sinh) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(sinpi) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(sqrt) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(tan) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(tanh) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(tanpi) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(tgamma) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(trunc) -BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_ST(dot) -BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION_ST(distance) +// Native Math Built-In Functions +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_cos) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(native_divide) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_exp) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_exp2) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_exp10) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_log) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_log2) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_log10) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(native_powr) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_recip) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_rsqrt) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_sin) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_sqrt) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(native_tan) -BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(clamp) -BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(fma) -BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(mad) -BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(smoothstep) +// Half Math Built-In Functions +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_cos) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(half_divide) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_exp) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_exp2) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_exp10) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_log) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_log2) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_log10) +BOOST_COMPUTE_LAMBDA_WRAP_BINARY_FUNCTION(half_powr) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_recip) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_rsqrt) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_sin) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_sqrt) +BOOST_COMPUTE_LAMBDA_WRAP_UNARY_FUNCTION_T(half_tan) + +// Relational Built-In Functions +BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(isequal) +BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(isnotequal) +BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(isgreater) +BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(isgreaterequal) +BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(isless) +BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(islessequal) +BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(islessgreater) +BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(isfinite) +BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(isinf) +BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(isnan) +BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(isnormal) +BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(isordered) +BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_BINARY_FUNCTION(isunordered) +BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(singbit) +BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(all) +BOOST_COMPUTE_LAMBDA_WRAP_BOOLEAN_UNARY_FUNCTION(any) +BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(bitselect) +BOOST_COMPUTE_LAMBDA_WRAP_TERNARY_FUNCTION(select) } // end lambda namespace } // end compute namespace diff --git a/boost/compute/memory/svm_ptr.hpp b/boost/compute/memory/svm_ptr.hpp index 0c9d88035c..c8753f5b34 100644 --- a/boost/compute/memory/svm_ptr.hpp +++ b/boost/compute/memory/svm_ptr.hpp @@ -29,7 +29,7 @@ template<class T> class svm_ptr; // svm functions require OpenCL 2.0 -#if defined(CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) +#if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) namespace detail { template<class T, class IndexExpr> @@ -131,8 +131,18 @@ public: return m_context; } + bool operator==(const svm_ptr<T>& other) const + { + return (other.m_context == m_context) && (m_ptr == other.m_ptr); + } + + bool operator!=(const svm_ptr<T>& other) const + { + return (other.m_context != m_context) || (m_ptr != other.m_ptr); + } + // svm functions require OpenCL 2.0 - #if defined(CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// \internal_ template<class Expr> detail::svm_ptr_index_expr<T, Expr> diff --git a/boost/compute/memory_object.hpp b/boost/compute/memory_object.hpp index 14c4cf4c7e..75c8738f8f 100644 --- a/boost/compute/memory_object.hpp +++ b/boost/compute/memory_object.hpp @@ -38,7 +38,7 @@ public: use_host_ptr = CL_MEM_USE_HOST_PTR, alloc_host_ptr = CL_MEM_ALLOC_HOST_PTR, copy_host_ptr = CL_MEM_COPY_HOST_PTR - #ifdef CL_VERSION_1_2 + #ifdef BOOST_COMPUTE_CL_VERSION_1_2 , host_write_only = CL_MEM_HOST_WRITE_ONLY, host_read_only = CL_MEM_HOST_READ_ONLY, @@ -105,7 +105,7 @@ public: return detail::get_object_info<T>(clGetMemObjectInfo, m_mem, info); } - #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + #if defined(BOOST_COMPUTE_CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Registers a function to be called when the memory object is deleted /// and its resources freed. /// @@ -137,7 +137,7 @@ public: new boost::function<void()>(callback) ); } - #endif // CL_VERSION_1_1 + #endif // BOOST_COMPUTE_CL_VERSION_1_1 /// Returns \c true if the memory object is the same as \p other. bool operator==(const memory_object &other) const @@ -152,7 +152,7 @@ public: } private: - #ifdef CL_VERSION_1_1 + #ifdef BOOST_COMPUTE_CL_VERSION_1_1 /// \internal_ static void BOOST_COMPUTE_CL_CALLBACK destructor_callback_invoker(cl_mem, void *user_data) @@ -164,7 +164,7 @@ private: delete callback; } - #endif // CL_VERSION_1_1 + #endif // BOOST_COMPUTE_CL_VERSION_1_1 protected: /// \internal_ diff --git a/boost/compute/pipe.hpp b/boost/compute/pipe.hpp index 944674e622..907750a3ba 100644 --- a/boost/compute/pipe.hpp +++ b/boost/compute/pipe.hpp @@ -18,7 +18,7 @@ #include <boost/compute/detail/get_object_info.hpp> // pipe objects require opencl 2.0 -#if defined(CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) +#if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) namespace boost { namespace compute { @@ -149,6 +149,6 @@ struct set_kernel_arg<pipe> } // end compute namespace } // end boost namespace -#endif // CL_VERSION_2_0 +#endif // BOOST_COMPUTE_CL_VERSION_2_0 #endif // BOOST_COMPUTE_PIPE_HPP diff --git a/boost/compute/platform.hpp b/boost/compute/platform.hpp index 65fda84e11..fddfec8c11 100644 --- a/boost/compute/platform.hpp +++ b/boost/compute/platform.hpp @@ -181,7 +181,7 @@ public: /// function. Returns \c 0 if \p function_name is invalid. void* get_extension_function_address(const char *function_name) const { - #ifdef CL_VERSION_1_2 + #ifdef BOOST_COMPUTE_CL_VERSION_1_2 return clGetExtensionFunctionAddressForPlatform(m_platform, function_name); #else @@ -192,7 +192,7 @@ public: /// Requests that the platform unload any compiler resources. void unload_compiler() { - #ifdef CL_VERSION_1_2 + #ifdef BOOST_COMPUTE_CL_VERSION_1_2 clUnloadPlatformCompiler(m_platform); #else clUnloadCompiler(); diff --git a/boost/compute/program.hpp b/boost/compute/program.hpp index 7573aa02e6..e953d7a89e 100644 --- a/boost/compute/program.hpp +++ b/boost/compute/program.hpp @@ -272,13 +272,15 @@ public: } } - #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Compiles the program with \p options. /// /// \opencl_version_warning{1,2} /// /// \see_opencl_ref{clCompileProgram} - void compile(const std::string &options = std::string()) + void compile(const std::string &options = std::string(), + const std::vector<std::pair<std::string, program> > &headers = + std::vector<std::pair<std::string, program> >()) { const char *options_string = 0; @@ -286,9 +288,36 @@ public: options_string = options.c_str(); } - cl_int ret = clCompileProgram( - m_program, 0, 0, options_string, 0, 0, 0, 0, 0 - ); + cl_int ret; + if (headers.empty()) + { + ret = clCompileProgram( + m_program, 0, 0, options_string, 0, 0, 0, 0, 0 + ); + } + else + { + std::vector<const char*> header_names(headers.size()); + std::vector<cl_program> header_programs(headers.size()); + for (size_t i = 0; i < headers.size(); ++i) + { + header_names[i] = headers[i].first.c_str(); + header_programs[i] = headers[i].second.m_program; + } + + ret = clCompileProgram( + m_program, + 0, + 0, + options_string, + static_cast<cl_uint>(headers.size()), + header_programs.data(), + header_names.data(), + 0, + 0 + ); + } + if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); @@ -329,7 +358,7 @@ public: return program(program_, false); } - #endif // CL_VERSION_1_2 + #endif // BOOST_COMPUTE_CL_VERSION_1_2 /// Returns the build log. std::string build_log() const @@ -432,6 +461,33 @@ public: return create_with_source(source, context); } + /// Creates a new program with \p files in \p context. + /// + /// \see_opencl_ref{clCreateProgramWithSource} + static program create_with_source_file(const std::vector<std::string> &files, + const context &context) + { + std::vector<std::string> sources(files.size()); + + for(size_t i = 0; i < files.size(); ++i) { + // open file stream + std::ifstream stream(files[i].c_str()); + + if(stream.fail()){ + BOOST_THROW_EXCEPTION(std::ios_base::failure("failed to create stream.")); + } + + // read source + sources[i] = std::string( + (std::istreambuf_iterator<char>(stream)), + std::istreambuf_iterator<char>() + ); + } + + // create program + return create_with_source(sources, context); + } + /// Creates a new program with \p binary of \p binary_size in /// \p context. /// @@ -489,7 +545,7 @@ public: return create_with_binary(&binary[0], binary.size(), context); } - #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) + #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Creates a new program with the built-in kernels listed in /// \p kernel_names for \p devices in \p context. /// @@ -516,7 +572,7 @@ public: return program(program_, false); } - #endif // CL_VERSION_1_2 + #endif // BOOST_COMPUTE_CL_VERSION_1_2 /// Create a new program with \p source in \p context and builds it with \p options. /** @@ -543,10 +599,11 @@ public: .process( options ) .process( source ) ; + std::string hash_string = hash; // Try to get cached program binaries: try { - boost::optional<program> prog = load_program_binary(hash, context); + boost::optional<program> prog = load_program_binary(hash_string, context); if (prog) { prog->build(options); @@ -575,7 +632,7 @@ public: #ifdef BOOST_COMPUTE_USE_OFFLINE_CACHE // Save program binaries for future reuse. - save_program_binary(hash, prog); + save_program_binary(hash_string, prog); #endif return prog; @@ -637,12 +694,12 @@ BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(program, ((std::vector<unsigned char *>, CL_PROGRAM_BINARIES)) ) -#ifdef CL_VERSION_1_2 +#ifdef BOOST_COMPUTE_CL_VERSION_1_2 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(program, ((size_t, CL_PROGRAM_NUM_KERNELS)) ((std::string, CL_PROGRAM_KERNEL_NAMES)) ) -#endif // CL_VERSION_1_2 +#endif // BOOST_COMPUTE_CL_VERSION_1_2 } // end compute namespace } // end boost namespace diff --git a/boost/compute/svm.hpp b/boost/compute/svm.hpp index 4bc3a74237..da493a9388 100644 --- a/boost/compute/svm.hpp +++ b/boost/compute/svm.hpp @@ -16,7 +16,7 @@ #include <boost/compute/memory/svm_ptr.hpp> // svm functions require OpenCL 2.0 -#if defined(CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) +#if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) namespace boost { namespace compute { @@ -67,6 +67,6 @@ inline void svm_free(const context &context, svm_ptr<T> ptr) } // end compute namespace } // end boost namespace -#endif // CL_VERSION_2_0 +#endif // BOOST_COMPUTE_CL_VERSION_2_0 #endif // BOOST_COMPUTE_PIPE_HPP diff --git a/boost/compute/system.hpp b/boost/compute/system.hpp index e6ed353d68..5f29893b9d 100644 --- a/boost/compute/system.hpp +++ b/boost/compute/system.hpp @@ -55,6 +55,10 @@ public: /// name of the platform (e.g. "NVIDIA CUDA") /// \li \c BOOST_COMPUTE_DEFAULT_VENDOR - /// name of the device vendor (e.g. "NVIDIA") + /// \li \c BOOST_COMPUTE_DEFAULT_ENFORCE - + /// If this is set to "1", then throw a no_device_found() exception + /// if any of the above environment variables is set, but a matching + /// device was not found. /// /// The default device is determined once on the first time this function /// is called. Calling this function multiple times will always result in @@ -220,6 +224,7 @@ private: const char *type = detail::getenv("BOOST_COMPUTE_DEFAULT_DEVICE_TYPE"); const char *platform = detail::getenv("BOOST_COMPUTE_DEFAULT_PLATFORM"); const char *vendor = detail::getenv("BOOST_COMPUTE_DEFAULT_VENDOR"); + const char *enforce = detail::getenv("BOOST_COMPUTE_DEFAULT_ENFORCE"); if(name || type || platform || vendor){ for(size_t i = 0; i < devices_.size(); i++){ @@ -243,6 +248,9 @@ private: return device; } + + if(enforce && enforce[0] == '1') + BOOST_THROW_EXCEPTION(no_device_found()); } // find the first gpu device diff --git a/boost/compute/type_traits/type_definition.hpp b/boost/compute/type_traits/type_definition.hpp index de9095fbd2..3dcc4607fa 100644 --- a/boost/compute/type_traits/type_definition.hpp +++ b/boost/compute/type_traits/type_definition.hpp @@ -18,7 +18,10 @@ namespace compute { namespace detail { template<class T> -struct type_definition_trait; +struct type_definition_trait +{ + static std::string value() { return std::string(); } +}; } // end detail namespace diff --git a/boost/compute/types/tuple.hpp b/boost/compute/types/tuple.hpp index 095bd95448..bc2971e49e 100644 --- a/boost/compute/types/tuple.hpp +++ b/boost/compute/types/tuple.hpp @@ -206,7 +206,7 @@ inline meta_kernel& operator<<(meta_kernel &kernel, \ typedef typename boost::tuple<BOOST_PP_ENUM_PARAMS(n, T)> T; \ BOOST_STATIC_ASSERT(N < size_t(boost::tuples::length<T>::value)); \ kernel.inject_type<T>(); \ - return kernel << expr.m_arg << ".v" << uint_(N); \ + return kernel << expr.m_arg << ".v" << int_(N); \ } BOOST_PP_REPEAT_FROM_TO(1, BOOST_COMPUTE_MAX_ARITY, BOOST_COMPUTE_GET_N, ~) diff --git a/boost/compute/user_event.hpp b/boost/compute/user_event.hpp index a3fdba033e..6981b9c7f8 100644 --- a/boost/compute/user_event.hpp +++ b/boost/compute/user_event.hpp @@ -17,7 +17,7 @@ namespace boost { namespace compute { -#if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) +#if defined(BOOST_COMPUTE_CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// \class user_event /// \brief An user-created event. /// @@ -80,7 +80,7 @@ public: } } }; -#endif // CL_VERSION_1_1 +#endif // BOOST_COMPUTE_CL_VERSION_1_1 } // end compute namespace } // end boost namespace |