summaryrefslogtreecommitdiff
path: root/boost/compute/algorithm/detail
diff options
context:
space:
mode:
Diffstat (limited to 'boost/compute/algorithm/detail')
-rw-r--r--boost/compute/algorithm/detail/copy_on_device.hpp4
-rw-r--r--boost/compute/algorithm/detail/copy_to_device.hpp4
-rw-r--r--boost/compute/algorithm/detail/copy_to_host.hpp4
-rw-r--r--boost/compute/algorithm/detail/find_extrema.hpp2
-rw-r--r--boost/compute/algorithm/detail/find_extrema_with_reduce.hpp1
-rw-r--r--boost/compute/algorithm/detail/find_if_with_atomics.hpp1
-rw-r--r--boost/compute/algorithm/detail/merge_sort_on_gpu.hpp10
-rw-r--r--boost/compute/algorithm/detail/radix_sort.hpp8
-rw-r--r--boost/compute/algorithm/detail/serial_reduce.hpp1
-rw-r--r--boost/compute/algorithm/detail/serial_reduce_by_key.hpp6
10 files changed, 26 insertions, 15 deletions
diff --git a/boost/compute/algorithm/detail/copy_on_device.hpp b/boost/compute/algorithm/detail/copy_on_device.hpp
index 8738c8c0b4..034b3bc212 100644
--- a/boost/compute/algorithm/detail/copy_on_device.hpp
+++ b/boost/compute/algorithm/detail/copy_on_device.hpp
@@ -144,7 +144,7 @@ inline future<OutputIterator> copy_on_device_async(InputIterator first,
return make_future(result + std::distance(first, last), event_);
}
-#ifdef CL_VERSION_2_0
+#ifdef BOOST_COMPUTE_CL_VERSION_2_0
// copy_on_device() specialization for svm_ptr
template<class T>
inline svm_ptr<T> copy_on_device(svm_ptr<T> first,
@@ -181,7 +181,7 @@ inline future<svm_ptr<T> > copy_on_device_async(svm_ptr<T> first,
return make_future(result + count, event_);
}
-#endif // CL_VERSION_2_0
+#endif // BOOST_COMPUTE_CL_VERSION_2_0
} // end detail namespace
} // end compute namespace
diff --git a/boost/compute/algorithm/detail/copy_to_device.hpp b/boost/compute/algorithm/detail/copy_to_device.hpp
index bce5975f53..8601bb20ec 100644
--- a/boost/compute/algorithm/detail/copy_to_device.hpp
+++ b/boost/compute/algorithm/detail/copy_to_device.hpp
@@ -124,7 +124,7 @@ inline future<DeviceIterator> copy_to_device_async(HostIterator first,
return make_future(result + static_cast<difference_type>(count), event_);
}
-#ifdef CL_VERSION_2_0
+#ifdef BOOST_COMPUTE_CL_VERSION_2_0
// copy_to_device() specialization for svm_ptr
template<class HostIterator, class T>
inline svm_ptr<T> copy_to_device(HostIterator first,
@@ -184,7 +184,7 @@ inline svm_ptr<T> copy_to_device_map(HostIterator first,
return result + count;
}
-#endif // CL_VERSION_2_0
+#endif // BOOST_COMPUTE_CL_VERSION_2_0
} // end detail namespace
} // end compute namespace
diff --git a/boost/compute/algorithm/detail/copy_to_host.hpp b/boost/compute/algorithm/detail/copy_to_host.hpp
index d770a996ef..89b57174fa 100644
--- a/boost/compute/algorithm/detail/copy_to_host.hpp
+++ b/boost/compute/algorithm/detail/copy_to_host.hpp
@@ -125,7 +125,7 @@ inline future<HostIterator> copy_to_host_async(DeviceIterator first,
return make_future(iterator_plus_distance(result, count), event_);
}
-#ifdef CL_VERSION_2_0
+#ifdef BOOST_COMPUTE_CL_VERSION_2_0
// copy_to_host() specialization for svm_ptr
template<class T, class HostIterator>
inline HostIterator copy_to_host(svm_ptr<T> first,
@@ -189,7 +189,7 @@ inline HostIterator copy_to_host_map(svm_ptr<T> first,
return iterator_plus_distance(result, count);
}
-#endif // CL_VERSION_2_0
+#endif // BOOST_COMPUTE_CL_VERSION_2_0
} // end detail namespace
} // end compute namespace
diff --git a/boost/compute/algorithm/detail/find_extrema.hpp b/boost/compute/algorithm/detail/find_extrema.hpp
index eef2e36c3c..734b75aa90 100644
--- a/boost/compute/algorithm/detail/find_extrema.hpp
+++ b/boost/compute/algorithm/detail/find_extrema.hpp
@@ -56,7 +56,7 @@ inline InputIterator find_extrema(InputIterator first,
// use serial method for OpenCL version 1.0 due to
// problems with atomic_cmpxchg()
- #ifndef CL_VERSION_1_1
+ #ifndef BOOST_COMPUTE_CL_VERSION_1_1
return serial_find_extrema(first, last, compare, find_minimum, queue);
#endif
diff --git a/boost/compute/algorithm/detail/find_extrema_with_reduce.hpp b/boost/compute/algorithm/detail/find_extrema_with_reduce.hpp
index 8f2a83c38b..515d7cc6da 100644
--- a/boost/compute/algorithm/detail/find_extrema_with_reduce.hpp
+++ b/boost/compute/algorithm/detail/find_extrema_with_reduce.hpp
@@ -246,6 +246,7 @@ inline void find_extrema_with_reduce(InputIterator input,
);
}
+// Space complexity: \Omega(2 * work-group-size * work-groups-per-compute-unit)
template<class InputIterator, class Compare>
InputIterator find_extrema_with_reduce(InputIterator first,
InputIterator last,
diff --git a/boost/compute/algorithm/detail/find_if_with_atomics.hpp b/boost/compute/algorithm/detail/find_if_with_atomics.hpp
index 112c34cf00..e14fd12ae0 100644
--- a/boost/compute/algorithm/detail/find_if_with_atomics.hpp
+++ b/boost/compute/algorithm/detail/find_if_with_atomics.hpp
@@ -153,6 +153,7 @@ inline InputIterator find_if_with_atomics_multiple_vpt(InputIterator first,
return first + static_cast<difference_type>(index.read(queue));
}
+// Space complexity: O(1)
template<class InputIterator, class UnaryPredicate>
inline InputIterator find_if_with_atomics(InputIterator first,
InputIterator last,
diff --git a/boost/compute/algorithm/detail/merge_sort_on_gpu.hpp b/boost/compute/algorithm/detail/merge_sort_on_gpu.hpp
index e62c6beb8d..d5e1a2d8c9 100644
--- a/boost/compute/algorithm/detail/merge_sort_on_gpu.hpp
+++ b/boost/compute/algorithm/detail/merge_sort_on_gpu.hpp
@@ -91,6 +91,7 @@ inline size_t bitonic_block_sort(KeyIterator keys_first,
command_queue &queue)
{
typedef typename std::iterator_traits<KeyIterator>::value_type key_type;
+ typedef typename std::iterator_traits<ValueIterator>::value_type value_type;
meta_kernel k("bitonic_block_sort");
size_t count_arg = k.add_arg<const uint_>("count");
@@ -249,8 +250,11 @@ inline size_t bitonic_block_sort(KeyIterator keys_first,
k.var<key_type>("my_key") << ";\n";
if(sort_by_key)
{
- k << values_first[k.var<const uint_>("gid")] << " = " <<
- values_first[k.var<const uint_>("offset + my_index")] << ";\n";
+ k <<
+ k.decl<value_type>("my_value") << " = " <<
+ values_first[k.var<const uint_>("offset + my_index")] << ";\n" <<
+ "barrier(CLK_GLOBAL_MEM_FENCE);\n" <<
+ values_first[k.var<const uint_>("gid")] << " = my_value;\n";
}
k <<
// end if
@@ -418,7 +422,7 @@ inline void merge_blocks_on_gpu(KeyIterator keys_first,
");\n" <<
"left_idx = equal ? mid_idx + 1 : left_idx + 1;\n" <<
"right_idx = equal ? right_idx : mid_idx;\n" <<
- "upper_key = equal ? upper_key : " <<
+ "upper_key = " <<
keys_first[k.var<const uint_>("left_idx")] << ";\n" <<
"}\n" <<
"}\n" <<
diff --git a/boost/compute/algorithm/detail/radix_sort.hpp b/boost/compute/algorithm/detail/radix_sort.hpp
index 8e6d5f9c0a..53b1205c70 100644
--- a/boost/compute/algorithm/detail/radix_sort.hpp
+++ b/boost/compute/algorithm/detail/radix_sort.hpp
@@ -17,6 +17,9 @@
#include <boost/type_traits/is_signed.hpp>
#include <boost/type_traits/is_floating_point.hpp>
+#include <boost/mpl/and.hpp>
+#include <boost/mpl/not.hpp>
+
#include <boost/compute/kernel.hpp>
#include <boost/compute/program.hpp>
#include <boost/compute/command_queue.hpp>
@@ -305,9 +308,12 @@ inline void radix_sort_impl(const buffer_iterator<T> first,
options << " -DASC";
}
+ // get type definition if it is a custom struct
+ std::string custom_type_def = boost::compute::type_definition<T2>() + "\n";
+
// load radix sort program
program radix_sort_program = cache->get_or_build(
- cache_key, options.str(), radix_sort_source, context
+ cache_key, options.str(), custom_type_def + radix_sort_source, context
);
kernel count_kernel(radix_sort_program, "count");
diff --git a/boost/compute/algorithm/detail/serial_reduce.hpp b/boost/compute/algorithm/detail/serial_reduce.hpp
index 53aaf140fe..8b121274b9 100644
--- a/boost/compute/algorithm/detail/serial_reduce.hpp
+++ b/boost/compute/algorithm/detail/serial_reduce.hpp
@@ -20,6 +20,7 @@ namespace boost {
namespace compute {
namespace detail {
+// Space complexity: O(1)
template<class InputIterator, class OutputIterator, class BinaryFunction>
inline void serial_reduce(InputIterator first,
InputIterator last,
diff --git a/boost/compute/algorithm/detail/serial_reduce_by_key.hpp b/boost/compute/algorithm/detail/serial_reduce_by_key.hpp
index f9bda8e476..6fb04baa6d 100644
--- a/boost/compute/algorithm/detail/serial_reduce_by_key.hpp
+++ b/boost/compute/algorithm/detail/serial_reduce_by_key.hpp
@@ -55,11 +55,9 @@ inline size_t serial_reduce_by_key(InputKeyIterator keys_first,
size_t result_size_arg = k.add_arg<uint_ *>(memory_object::global_memory,
"result_size");
- convert<result_type> to_result_type;
-
k <<
k.decl<result_type>("result") <<
- " = " << to_result_type(values_first[0]) << ";\n" <<
+ " = " << values_first[0] << ";\n" <<
k.decl<key_type>("previous_key") << " = " << keys_first[0] << ";\n" <<
k.decl<result_type>("value") << ";\n" <<
k.decl<key_type>("key") << ";\n" <<
@@ -70,7 +68,7 @@ inline size_t serial_reduce_by_key(InputKeyIterator keys_first,
values_result[0] << " = result;\n" <<
"for(ulong i = 1; i < count; i++) {\n" <<
- " value = " << to_result_type(values_first[k.var<uint_>("i")]) << ";\n" <<
+ " value = " << values_first[k.var<uint_>("i")] << ";\n" <<
" key = " << keys_first[k.var<uint_>("i")] << ";\n" <<
" if (" << predicate(k.var<key_type>("previous_key"),
k.var<key_type>("key")) << ") {\n" <<