summaryrefslogtreecommitdiff
path: root/boost/compute/algorithm/detail/copy_on_device.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'boost/compute/algorithm/detail/copy_on_device.hpp')
-rw-r--r--boost/compute/algorithm/detail/copy_on_device.hpp190
1 files changed, 190 insertions, 0 deletions
diff --git a/boost/compute/algorithm/detail/copy_on_device.hpp b/boost/compute/algorithm/detail/copy_on_device.hpp
new file mode 100644
index 0000000000..0bcee27ed5
--- /dev/null
+++ b/boost/compute/algorithm/detail/copy_on_device.hpp
@@ -0,0 +1,190 @@
+//---------------------------------------------------------------------------//
+// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
+//
+// Distributed under the Boost Software License, Version 1.0
+// See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt
+//
+// See http://boostorg.github.com/compute for more information.
+//---------------------------------------------------------------------------//
+
+#ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_ON_DEVICE_HPP
+#define BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_ON_DEVICE_HPP
+
+#include <iterator>
+
+#include <boost/compute/command_queue.hpp>
+#include <boost/compute/async/future.hpp>
+#include <boost/compute/iterator/buffer_iterator.hpp>
+#include <boost/compute/iterator/discard_iterator.hpp>
+#include <boost/compute/memory/svm_ptr.hpp>
+#include <boost/compute/detail/iterator_range_size.hpp>
+#include <boost/compute/detail/meta_kernel.hpp>
+#include <boost/compute/detail/parameter_cache.hpp>
+#include <boost/compute/detail/work_size.hpp>
+
+namespace boost {
+namespace compute {
+namespace detail {
+
+inline size_t pick_copy_work_group_size(size_t n, const device &device)
+{
+ (void) device;
+
+ if(n % 32 == 0) return 32;
+ else if(n % 16 == 0) return 16;
+ else if(n % 8 == 0) return 8;
+ else if(n % 4 == 0) return 4;
+ else if(n % 2 == 0) return 2;
+ else return 1;
+}
+
+template<class InputIterator, class OutputIterator>
+class copy_kernel : public meta_kernel
+{
+public:
+ copy_kernel(const device &device)
+ : meta_kernel("copy")
+ {
+ m_count = 0;
+
+ typedef typename std::iterator_traits<InputIterator>::value_type input_type;
+
+ boost::shared_ptr<parameter_cache> parameters =
+ detail::parameter_cache::get_global_cache(device);
+
+ std::string cache_key =
+ "__boost_copy_kernel_" + boost::lexical_cast<std::string>(sizeof(input_type));
+
+ m_vpt = parameters->get(cache_key, "vpt", 4);
+ m_tpb = parameters->get(cache_key, "tpb", 128);
+ }
+
+ void set_range(InputIterator first,
+ InputIterator last,
+ OutputIterator result)
+ {
+ m_count_arg = add_arg<uint_>("count");
+
+ *this <<
+ "uint index = get_local_id(0) + " <<
+ "(" << m_vpt * m_tpb << " * get_group_id(0));\n" <<
+ "for(uint i = 0; i < " << m_vpt << "; i++){\n" <<
+ " if(index < count){\n" <<
+ result[expr<uint_>("index")] << '=' <<
+ first[expr<uint_>("index")] << ";\n" <<
+ " index += " << m_tpb << ";\n"
+ " }\n"
+ "}\n";
+
+ m_count = detail::iterator_range_size(first, last);
+ }
+
+ event exec(command_queue &queue)
+ {
+ if(m_count == 0){
+ // nothing to do
+ return event();
+ }
+
+ size_t global_work_size = calculate_work_size(m_count, m_vpt, m_tpb);
+
+ set_arg(m_count_arg, uint_(m_count));
+
+ return exec_1d(queue, 0, global_work_size, m_tpb);
+ }
+
+private:
+ size_t m_count;
+ size_t m_count_arg;
+ uint_ m_vpt;
+ uint_ m_tpb;
+};
+
+template<class InputIterator, class OutputIterator>
+inline OutputIterator copy_on_device(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ command_queue &queue)
+{
+ const device &device = queue.get_device();
+
+ copy_kernel<InputIterator, OutputIterator> kernel(device);
+
+ kernel.set_range(first, last, result);
+ kernel.exec(queue);
+
+ return result + std::distance(first, last);
+}
+
+template<class InputIterator>
+inline discard_iterator copy_on_device(InputIterator first,
+ InputIterator last,
+ discard_iterator result,
+ command_queue &queue)
+{
+ (void) queue;
+
+ return result + std::distance(first, last);
+}
+
+template<class InputIterator, class OutputIterator>
+inline future<OutputIterator> copy_on_device_async(InputIterator first,
+ InputIterator last,
+ OutputIterator result,
+ command_queue &queue)
+{
+ const device &device = queue.get_device();
+
+ copy_kernel<InputIterator, OutputIterator> kernel(device);
+
+ kernel.set_range(first, last, result);
+ event event_ = kernel.exec(queue);
+
+ return make_future(result + std::distance(first, last), event_);
+}
+
+#ifdef CL_VERSION_2_0
+// copy_on_device() specialization for svm_ptr
+template<class T>
+inline svm_ptr<T> copy_on_device(svm_ptr<T> first,
+ svm_ptr<T> last,
+ svm_ptr<T> result,
+ command_queue &queue)
+{
+ size_t count = iterator_range_size(first, last);
+ if(count == 0){
+ return result;
+ }
+
+ queue.enqueue_svm_memcpy(
+ result.get(), first.get(), count * sizeof(T)
+ );
+
+ return result + count;
+}
+
+template<class T>
+inline future<svm_ptr<T> > copy_on_device_async(svm_ptr<T> first,
+ svm_ptr<T> last,
+ svm_ptr<T> result,
+ command_queue &queue)
+{
+ size_t count = iterator_range_size(first, last);
+ if(count == 0){
+ return result;
+ }
+
+ event event_ = queue.enqueue_svm_memcpy_async(
+ result.get(), first.get(), count * sizeof(T)
+ );
+
+ return make_future(result + count, event_);
+}
+#endif // CL_VERSION_2_0
+
+} // end detail namespace
+} // end compute namespace
+} // end boost namespace
+
+#endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_ON_DEVICE_HPP