From 4fadd968fa12130524c8380f33fcfe25d4de79e5 Mon Sep 17 00:00:00 2001 From: DongHun Kwak Date: Wed, 13 Sep 2017 11:24:46 +0900 Subject: Imported Upstream version 1.65.0 Change-Id: Icf8400b375482cb11bcf77440a6934ba360d6ba4 Signed-off-by: DongHun Kwak --- boost/compute/algorithm/detail/copy_on_device.hpp | 4 ++-- boost/compute/algorithm/detail/copy_to_device.hpp | 4 ++-- boost/compute/algorithm/detail/copy_to_host.hpp | 4 ++-- boost/compute/algorithm/detail/find_extrema.hpp | 2 +- boost/compute/algorithm/detail/find_extrema_with_reduce.hpp | 1 + boost/compute/algorithm/detail/find_if_with_atomics.hpp | 1 + boost/compute/algorithm/detail/merge_sort_on_gpu.hpp | 10 +++++++--- boost/compute/algorithm/detail/radix_sort.hpp | 8 +++++++- boost/compute/algorithm/detail/serial_reduce.hpp | 1 + boost/compute/algorithm/detail/serial_reduce_by_key.hpp | 6 ++---- 10 files changed, 26 insertions(+), 15 deletions(-) (limited to 'boost/compute/algorithm/detail') diff --git a/boost/compute/algorithm/detail/copy_on_device.hpp b/boost/compute/algorithm/detail/copy_on_device.hpp index 8738c8c0b4..034b3bc212 100644 --- a/boost/compute/algorithm/detail/copy_on_device.hpp +++ b/boost/compute/algorithm/detail/copy_on_device.hpp @@ -144,7 +144,7 @@ inline future copy_on_device_async(InputIterator first, return make_future(result + std::distance(first, last), event_); } -#ifdef CL_VERSION_2_0 +#ifdef BOOST_COMPUTE_CL_VERSION_2_0 // copy_on_device() specialization for svm_ptr template inline svm_ptr copy_on_device(svm_ptr first, @@ -181,7 +181,7 @@ inline future > copy_on_device_async(svm_ptr first, return make_future(result + count, event_); } -#endif // CL_VERSION_2_0 +#endif // BOOST_COMPUTE_CL_VERSION_2_0 } // end detail namespace } // end compute namespace diff --git a/boost/compute/algorithm/detail/copy_to_device.hpp b/boost/compute/algorithm/detail/copy_to_device.hpp index bce5975f53..8601bb20ec 100644 --- a/boost/compute/algorithm/detail/copy_to_device.hpp +++ b/boost/compute/algorithm/detail/copy_to_device.hpp @@ -124,7 +124,7 @@ inline future copy_to_device_async(HostIterator first, return make_future(result + static_cast(count), event_); } -#ifdef CL_VERSION_2_0 +#ifdef BOOST_COMPUTE_CL_VERSION_2_0 // copy_to_device() specialization for svm_ptr template inline svm_ptr copy_to_device(HostIterator first, @@ -184,7 +184,7 @@ inline svm_ptr copy_to_device_map(HostIterator first, return result + count; } -#endif // CL_VERSION_2_0 +#endif // BOOST_COMPUTE_CL_VERSION_2_0 } // end detail namespace } // end compute namespace diff --git a/boost/compute/algorithm/detail/copy_to_host.hpp b/boost/compute/algorithm/detail/copy_to_host.hpp index d770a996ef..89b57174fa 100644 --- a/boost/compute/algorithm/detail/copy_to_host.hpp +++ b/boost/compute/algorithm/detail/copy_to_host.hpp @@ -125,7 +125,7 @@ inline future copy_to_host_async(DeviceIterator first, return make_future(iterator_plus_distance(result, count), event_); } -#ifdef CL_VERSION_2_0 +#ifdef BOOST_COMPUTE_CL_VERSION_2_0 // copy_to_host() specialization for svm_ptr template inline HostIterator copy_to_host(svm_ptr first, @@ -189,7 +189,7 @@ inline HostIterator copy_to_host_map(svm_ptr first, return iterator_plus_distance(result, count); } -#endif // CL_VERSION_2_0 +#endif // BOOST_COMPUTE_CL_VERSION_2_0 } // end detail namespace } // end compute namespace diff --git a/boost/compute/algorithm/detail/find_extrema.hpp b/boost/compute/algorithm/detail/find_extrema.hpp index eef2e36c3c..734b75aa90 100644 --- a/boost/compute/algorithm/detail/find_extrema.hpp +++ b/boost/compute/algorithm/detail/find_extrema.hpp @@ -56,7 +56,7 @@ inline InputIterator find_extrema(InputIterator first, // use serial method for OpenCL version 1.0 due to // problems with atomic_cmpxchg() - #ifndef CL_VERSION_1_1 + #ifndef BOOST_COMPUTE_CL_VERSION_1_1 return serial_find_extrema(first, last, compare, find_minimum, queue); #endif diff --git a/boost/compute/algorithm/detail/find_extrema_with_reduce.hpp b/boost/compute/algorithm/detail/find_extrema_with_reduce.hpp index 8f2a83c38b..515d7cc6da 100644 --- a/boost/compute/algorithm/detail/find_extrema_with_reduce.hpp +++ b/boost/compute/algorithm/detail/find_extrema_with_reduce.hpp @@ -246,6 +246,7 @@ inline void find_extrema_with_reduce(InputIterator input, ); } +// Space complexity: \Omega(2 * work-group-size * work-groups-per-compute-unit) template InputIterator find_extrema_with_reduce(InputIterator first, InputIterator last, diff --git a/boost/compute/algorithm/detail/find_if_with_atomics.hpp b/boost/compute/algorithm/detail/find_if_with_atomics.hpp index 112c34cf00..e14fd12ae0 100644 --- a/boost/compute/algorithm/detail/find_if_with_atomics.hpp +++ b/boost/compute/algorithm/detail/find_if_with_atomics.hpp @@ -153,6 +153,7 @@ inline InputIterator find_if_with_atomics_multiple_vpt(InputIterator first, return first + static_cast(index.read(queue)); } +// Space complexity: O(1) template inline InputIterator find_if_with_atomics(InputIterator first, InputIterator last, diff --git a/boost/compute/algorithm/detail/merge_sort_on_gpu.hpp b/boost/compute/algorithm/detail/merge_sort_on_gpu.hpp index e62c6beb8d..d5e1a2d8c9 100644 --- a/boost/compute/algorithm/detail/merge_sort_on_gpu.hpp +++ b/boost/compute/algorithm/detail/merge_sort_on_gpu.hpp @@ -91,6 +91,7 @@ inline size_t bitonic_block_sort(KeyIterator keys_first, command_queue &queue) { typedef typename std::iterator_traits::value_type key_type; + typedef typename std::iterator_traits::value_type value_type; meta_kernel k("bitonic_block_sort"); size_t count_arg = k.add_arg("count"); @@ -249,8 +250,11 @@ inline size_t bitonic_block_sort(KeyIterator keys_first, k.var("my_key") << ";\n"; if(sort_by_key) { - k << values_first[k.var("gid")] << " = " << - values_first[k.var("offset + my_index")] << ";\n"; + k << + k.decl("my_value") << " = " << + values_first[k.var("offset + my_index")] << ";\n" << + "barrier(CLK_GLOBAL_MEM_FENCE);\n" << + values_first[k.var("gid")] << " = my_value;\n"; } k << // end if @@ -418,7 +422,7 @@ inline void merge_blocks_on_gpu(KeyIterator keys_first, ");\n" << "left_idx = equal ? mid_idx + 1 : left_idx + 1;\n" << "right_idx = equal ? right_idx : mid_idx;\n" << - "upper_key = equal ? upper_key : " << + "upper_key = " << keys_first[k.var("left_idx")] << ";\n" << "}\n" << "}\n" << diff --git a/boost/compute/algorithm/detail/radix_sort.hpp b/boost/compute/algorithm/detail/radix_sort.hpp index 8e6d5f9c0a..53b1205c70 100644 --- a/boost/compute/algorithm/detail/radix_sort.hpp +++ b/boost/compute/algorithm/detail/radix_sort.hpp @@ -17,6 +17,9 @@ #include #include +#include +#include + #include #include #include @@ -305,9 +308,12 @@ inline void radix_sort_impl(const buffer_iterator first, options << " -DASC"; } + // get type definition if it is a custom struct + std::string custom_type_def = boost::compute::type_definition() + "\n"; + // load radix sort program program radix_sort_program = cache->get_or_build( - cache_key, options.str(), radix_sort_source, context + cache_key, options.str(), custom_type_def + radix_sort_source, context ); kernel count_kernel(radix_sort_program, "count"); diff --git a/boost/compute/algorithm/detail/serial_reduce.hpp b/boost/compute/algorithm/detail/serial_reduce.hpp index 53aaf140fe..8b121274b9 100644 --- a/boost/compute/algorithm/detail/serial_reduce.hpp +++ b/boost/compute/algorithm/detail/serial_reduce.hpp @@ -20,6 +20,7 @@ namespace boost { namespace compute { namespace detail { +// Space complexity: O(1) template inline void serial_reduce(InputIterator first, InputIterator last, diff --git a/boost/compute/algorithm/detail/serial_reduce_by_key.hpp b/boost/compute/algorithm/detail/serial_reduce_by_key.hpp index f9bda8e476..6fb04baa6d 100644 --- a/boost/compute/algorithm/detail/serial_reduce_by_key.hpp +++ b/boost/compute/algorithm/detail/serial_reduce_by_key.hpp @@ -55,11 +55,9 @@ inline size_t serial_reduce_by_key(InputKeyIterator keys_first, size_t result_size_arg = k.add_arg(memory_object::global_memory, "result_size"); - convert to_result_type; - k << k.decl("result") << - " = " << to_result_type(values_first[0]) << ";\n" << + " = " << values_first[0] << ";\n" << k.decl("previous_key") << " = " << keys_first[0] << ";\n" << k.decl("value") << ";\n" << k.decl("key") << ";\n" << @@ -70,7 +68,7 @@ inline size_t serial_reduce_by_key(InputKeyIterator keys_first, values_result[0] << " = result;\n" << "for(ulong i = 1; i < count; i++) {\n" << - " value = " << to_result_type(values_first[k.var("i")]) << ";\n" << + " value = " << values_first[k.var("i")] << ";\n" << " key = " << keys_first[k.var("i")] << ";\n" << " if (" << predicate(k.var("previous_key"), k.var("key")) << ") {\n" << -- cgit v1.2.3